# PANDAS DATA STRUCTURES 
###### 1. SERIES ----> A SERIES OF DATA(1 DIMENSIONAL)
###### 2. DATAFRAMES ----> TABULAR DATA(2 DIMENSIONAL)

In [1]:
import pandas as pd

## SERIES

In [2]:
marks = pd.Series([80,70,60])
marks

0    80
1    70
2    60
dtype: int64

In [3]:
marks[1]

70

In [4]:
# specifying indexes
marks = pd.Series([80,70,60],index = ['ali','sara','ahmed'])
marks

ali      80
sara     70
ahmed    60
dtype: int64

In [5]:
marks['ali']

80

In [6]:
# specifying datatype
age = pd.Series([80,70,60],index = ['ali','sara','ahmed'],dtype = float)
age

ali      80.0
sara     70.0
ahmed    60.0
dtype: float64

In [7]:
# filtering
age[age>70]

ali    80.0
dtype: float64

In [8]:
# fancy indexing
age[['sara','ali']]

sara    70.0
ali     80.0
dtype: float64

In [9]:
# changing a value
age['sara'] = 18
age

ali      80.0
sara     18.0
ahmed    60.0
dtype: float64

In [10]:
# creating series from dictionary
age = pd.Series({'ali':80,'sara':70,'ahmed':60})
age

ali      80
sara     70
ahmed    60
dtype: int64

In [11]:
# checking null value
age = pd.Series({'ali':80,'sara':70,'ahmed':None})
age

ali      80.0
sara     70.0
ahmed     NaN
dtype: float64

In [12]:
pd.isnull(age)

ali      False
sara     False
ahmed     True
dtype: bool

In [13]:
age.mean()

75.0

In [14]:
# adding two series
age1 = pd.Series([100,50],index = ['ali','ahmed'])

In [15]:
display(age1)
display(age)

ali      100
ahmed     50
dtype: int64

ali      80.0
sara     70.0
ahmed     NaN
dtype: float64

In [16]:
s = age + age1
s

ahmed      NaN
ali      180.0
sara       NaN
dtype: float64

## DATAFRAMES

In [17]:
stds = pd.DataFrame({
    'NAMES':['SARAH','AHMED','ALI'],
    'AGE':[50,60,70],
    'CGPA':[3.4,2.9,3.6],
    'ADDRESS':['KARACHI','LAHORE','ISLAMABAD']
})
stds

Unnamed: 0,NAMES,AGE,CGPA,ADDRESS
0,SARAH,50,3.4,KARACHI
1,AHMED,60,2.9,LAHORE
2,ALI,70,3.6,ISLAMABAD


In [18]:
stds.columns

Index(['NAMES', 'AGE', 'CGPA', 'ADDRESS'], dtype='object')

In [19]:
stds['NAMES']

0    SARAH
1    AHMED
2      ALI
Name: NAMES, dtype: object

In [20]:
stds[['NAMES', 'AGE', 'CGPA']]

Unnamed: 0,NAMES,AGE,CGPA
0,SARAH,50,3.4
1,AHMED,60,2.9
2,ALI,70,3.6


In [21]:
stds[stds['AGE']>50]

Unnamed: 0,NAMES,AGE,CGPA,ADDRESS
1,AHMED,60,2.9,LAHORE
2,ALI,70,3.6,ISLAMABAD


In [25]:
# ADDING COLUMN
import numpy as np
stds['SEMESTER'] = np.arange(1,4)
stds

Unnamed: 0,NAMES,AGE,CGPA,ADDRESS,semester,SEMESTER
0,SARAH,50,3.4,KARACHI,1,1
1,AHMED,60,2.9,LAHORE,2,2
2,ALI,70,3.6,ISLAMABAD,3,3


In [26]:
stds[['SEMESTER','NAMES']]

Unnamed: 0,SEMESTER,NAMES
0,1,SARAH
1,2,AHMED
2,3,ALI


In [29]:
# MAKING INDEX NAMES BY OWN 
doctors = pd.DataFrame({
    'names':['asad','rahim'],
    'qualification':['PhD','MS']
},index = ['d1','d2'])

doctors

Unnamed: 0,names,qualification
d1,asad,PhD
d2,rahim,MS


In [38]:
#adding column without using numpy
doctors['address'] = 'karachi'
doctors

Unnamed: 0,names,qualification,address
d1,asad,PhD,karachi
d2,rahim,MS,karachi


In [39]:
# ACESSING ROW WISE DATA USING INDEX
doctors.loc['d1']

names               asad
qualification        PhD
address          karachi
Name: d1, dtype: object

In [40]:
# ACCESSING ROW WISE DATA USING INDEX LOCATION 
doctors.iloc[0]

names               asad
qualification        PhD
address          karachi
Name: d1, dtype: object

In [41]:
# DELETING COLUMN
del doctors['address']
doctors

Unnamed: 0,names,qualification
d1,asad,PhD
d2,rahim,MS


In [42]:
# DELETING COLUMN
del doctors['names']
doctors

Unnamed: 0,qualification
d1,PhD
d2,MS


In [44]:
doctors['names'] = 'TEST'
doctors

Unnamed: 0,qualification,names
d1,PhD,TEST
d2,MS,TEST


In [45]:
stds

Unnamed: 0,NAMES,AGE,CGPA,ADDRESS,semester,SEMESTER
0,SARAH,50,3.4,KARACHI,1,1
1,AHMED,60,2.9,LAHORE,2,2
2,ALI,70,3.6,ISLAMABAD,3,3


In [46]:
# TRANSPOSE
stds.T

Unnamed: 0,0,1,2
NAMES,SARAH,AHMED,ALI
AGE,50,60,70
CGPA,3.4,2.9,3.6
ADDRESS,KARACHI,LAHORE,ISLAMABAD
semester,1,2,3
SEMESTER,1,2,3


In [47]:
# SLICING IN A DATAFRAME
stds.iloc[::2]

Unnamed: 0,NAMES,AGE,CGPA,ADDRESS,semester,SEMESTER
0,SARAH,50,3.4,KARACHI,1,1
2,ALI,70,3.6,ISLAMABAD,3,3


In [48]:
# SLICING IN A DATAFRAME
stds.iloc[-1]

NAMES             ALI
AGE                70
CGPA              3.6
ADDRESS     ISLAMABAD
semester            3
SEMESTER            3
Name: 2, dtype: object

In [59]:
stds.iloc[:2]

Unnamed: 0,NAMES,AGE,CGPA,ADDRESS,semester,SEMESTER
0,SARAH,50,3.4,KARACHI,1,1
1,AHMED,60,2.9,LAHORE,2,2


In [61]:
stds.loc[:,'NAMES':'AGE']

Unnamed: 0,NAMES,AGE
0,SARAH,50
1,AHMED,60
2,ALI,70


In [49]:
# GENERATING NUMPY ARRAY BY DATAFRAME
stds.values

array([['SARAH', 50, 3.4, 'KARACHI', 1, 1],
       ['AHMED', 60, 2.9, 'LAHORE', 2, 2],
       ['ALI', 70, 3.6, 'ISLAMABAD', 3, 3]], dtype=object)

In [50]:
# REMOVING ROW
stds.drop([0],axis = 0)

Unnamed: 0,NAMES,AGE,CGPA,ADDRESS,semester,SEMESTER
1,AHMED,60,2.9,LAHORE,2,2
2,ALI,70,3.6,ISLAMABAD,3,3


In [51]:
# REMOVING COLUMN
stds.drop(['semester','SEMESTER'],axis = 1)

Unnamed: 0,NAMES,AGE,CGPA,ADDRESS
0,SARAH,50,3.4,KARACHI
1,AHMED,60,2.9,LAHORE
2,ALI,70,3.6,ISLAMABAD


#### ACCESSING DATAFRAME WITH DIFFERENT DATA STRUCTURES (ASSIGNMENT)


In [57]:
# CREATING DATAFRAME USING NUMPY
arr = np.array([['SARAH', 50, 3.4, 'KARACHI', 1],
       ['AHMED', 60, 2.9, 'LAHORE', 2],
       ['ALI', 70, 3.6, 'ISLAMABAD', 3],
       ['SHOAIB',37,2.9,'PINDI',4]])
df = pd.DataFrame(arr,columns = ['NAME','AGE','CGPA','CITY','SEMESTER'])
df

Unnamed: 0,NAME,AGE,CGPA,CITY,SEMESTER
0,SARAH,50,3.4,KARACHI,1
1,AHMED,60,2.9,LAHORE,2
2,ALI,70,3.6,ISLAMABAD,3
3,SHOAIB,37,2.9,PINDI,4


In [58]:
# CREATING DATAFRAME USING LIST
LIST = ([['SARAH', 50, 3.4, 'KARACHI', 1],
       ['AHMED', 60, 2.9, 'LAHORE', 2],
       ['ALI', 70, 3.6, 'ISLAMABAD', 3],
       ['SHOAIB',37,2.9,'PINDI',4]])
df = pd.DataFrame(LIST,columns = ['A','B','C','D','E'])
df

Unnamed: 0,A,B,C,D,E
0,SARAH,50,3.4,KARACHI,1
1,AHMED,60,2.9,LAHORE,2
2,ALI,70,3.6,ISLAMABAD,3
3,SHOAIB,37,2.9,PINDI,4


In [92]:
# CREATING DATAFRAME USING TUPLE
TUPLE = (('SARAH', 50, 3.4, 'KARACHI', 1),
       ('AHMED', 60, 2.9, 'LAHORE', 2),
       ('ALI', 70, 3.6, 'ISLAMABAD', 3),
       ('SHOAIB',37,2.9,'PINDI',4))
df = pd.DataFrame(TUPLE,columns = ['NAME','AGE','CGPA','CITY','SEMESTER'])
df

Unnamed: 0,NAME,AGE,CGPA,CITY,SEMESTER
0,SARAH,50,3.4,KARACHI,1
1,AHMED,60,2.9,LAHORE,2
2,ALI,70,3.6,ISLAMABAD,3
3,SHOAIB,37,2.9,PINDI,4


In [62]:
# CHANGING COLUMN NAME 
df.columns = ['NAMES','AGE','CGPA','CITY','SEMESTER']

In [64]:
df

Unnamed: 0,NAMES,AGE,CGPA,CITY,SEMESTER
0,SARAH,50,3.4,KARACHI,1
1,AHMED,60,2.9,LAHORE,2
2,ALI,70,3.6,ISLAMABAD,3
3,SHOAIB,37,2.9,PINDI,4


##### APPLYING FUNCTION

In [68]:
def old_young(AGE):
    if(AGE>40):
        return 'old'
    else:
        return 'young'
df['AGE'].apply(old_young)

0      old
1      old
2      old
3    young
Name: AGE, dtype: object

In [69]:
# saving above result in dataframe column
df['STATUS'] = df['AGE'].apply(old_young)
df

Unnamed: 0,NAMES,AGE,CGPA,CITY,SEMESTER,STATUS
0,SARAH,50,3.4,KARACHI,1,old
1,AHMED,60,2.9,LAHORE,2,old
2,ALI,70,3.6,ISLAMABAD,3,old
3,SHOAIB,37,2.9,PINDI,4,young


In [71]:
# LAMBDA FUNCTION ------> ONE LINE FUNCTION
old_young = lambda AGE: "OLD" if AGE>40 else "YOUNG"
df['STATUS'] = df['AGE'].apply(old_young)
df


Unnamed: 0,NAMES,AGE,CGPA,CITY,SEMESTER,STATUS
0,SARAH,50,3.4,KARACHI,1,OLD
1,AHMED,60,2.9,LAHORE,2,OLD
2,ALI,70,3.6,ISLAMABAD,3,OLD
3,SHOAIB,37,2.9,PINDI,4,YOUNG


In [74]:
# ANOTHER EXAMPLE
EMPLOYEE = pd.DataFrame(
{
    'salary':[1000,2000,3000],
    'name':['mark','jordan','yuaan']
})
EMPLOYEE

Unnamed: 0,salary,name
0,1000,mark
1,2000,jordan
2,3000,yuaan


In [79]:
EMPLOYEE['increment'] = EMPLOYEE['salary'].apply(lambda salary:50 if salary<=1000 else 25 )
EMPLOYEE

Unnamed: 0,salary,name,increment
0,1000,mark,50
1,2000,jordan,25
2,3000,yuaan,25


### NUMPY FUNCTIONS IN DATAFRAME(ASSIGNMENT)

In [80]:
np.abs(EMPLOYEE['increment'])

0    50
1    25
2    25
Name: increment, dtype: int64

In [81]:
np.mean(EMPLOYEE['increment'])

33.333333333333336

In [82]:
np.sin(EMPLOYEE['increment'])

0   -0.262375
1   -0.132352
2   -0.132352
Name: increment, dtype: float64

In [83]:
np.cos(EMPLOYEE['increment'])

0    0.964966
1    0.991203
2    0.991203
Name: increment, dtype: float64

In [84]:
np.tan(EMPLOYEE['increment'])

0   -0.271901
1   -0.133526
2   -0.133526
Name: increment, dtype: float64

In [85]:
np.median(EMPLOYEE['increment'])

25.0

In [87]:
np.exp(EMPLOYEE['increment'])

0    5.184706e+21
1    7.200490e+10
2    7.200490e+10
Name: increment, dtype: float64

In [88]:
np.fabs(EMPLOYEE['increment'])

0    50.0
1    25.0
2    25.0
Name: increment, dtype: float64

In [89]:
np.square(EMPLOYEE['increment'])

0    2500
1     625
2     625
Name: increment, dtype: int64

In [90]:
np.sqrt(EMPLOYEE['increment'])

0    7.071068
1    5.000000
2    5.000000
Name: increment, dtype: float64