## Pandas

In [1]:
import pandas as pd 
import numpy as np

### Series - One dimensional array

In [6]:
a = ['Alex','Jack','Jeff'] 

dt = pd.Series(a)
dt

0    Alex
1    Jack
2    Jeff
dtype: object

In [7]:
a = ['Alex','Jack',None]

dt = pd.Series(a)
dt

0    Alex
1    Jack
2    None
dtype: object

In [20]:
a = [1,2,None]

dt = pd.Series(a)
dt

0    1.0
1    2.0
2    NaN
dtype: float64

#### NaN --> Not a number

In [10]:
np.nan == np.nan

False

In [11]:
np.nan == None

False

In [16]:
np.isnan(np.nan)

True

In [24]:
dt[~np.isnan(dt)] # ~ Not (np.isnan(dt)) condition

0    1.0
1    2.0
dtype: float64

In [25]:
dic = {'Alex': 'Alex@gmail.com','John':'John@gmail.com','Jeff':'Jeff@gmail.com'}

In [27]:
dt =  pd.Series(dic)
dt

Alex    Alex@gmail.com
John    John@gmail.com
Jeff    Jeff@gmail.com
dtype: object

In [28]:
dt.index

Index(['Alex', 'John', 'Jeff'], dtype='object')

In [29]:
dt['Alex']

'Alex@gmail.com'

In [30]:
data = [('Alex','Nick'),('Jack','Adam'),('Andrew','Jeff')]

pd.Series(data)

0      (Alex, Nick)
1      (Jack, Adam)
2    (Andrew, Jeff)
dtype: object

In [31]:
pd.Series(['Physics','Chemistry','Biology'], index = ['Alex','Andrew','Nick'])

Alex        Physics
Andrew    Chemistry
Nick        Biology
dtype: object

In [33]:
dic = {'Alex': 'Alex@gmail.com','John':'John@gmail.com','Jeff':'Jeff@gmail.com'}

data = pd.Series(dic, index = ['Alex','John','Sam'])

In [39]:
data

Alex    Alex@gmail.com
John    John@gmail.com
Sam                NaN
dtype: object

### iloc is to fetch information from a row. It should be number
### loc is to fetch information from a row. It should be string

In [42]:
data.iloc[1] # 1 is index , iloc for number

'John@gmail.com'

In [56]:
data[1]

'John@gmail.com'

In [37]:
data.loc['John'] # John is index, loc for string

'John@gmail.com'

In [None]:
data['John']

In [57]:
student_data = {
    90:'Physics',
    80:'Chemistry',
    100:'Biology',
    'ABC':'XYV'
    
}

st = pd.Series(student_data)

In [62]:
st.iloc[3]

'XYV'

## Add rows to a series

In [63]:
s = pd.Series([1,2,3])
s.loc['Add'] = 8
s

0      1
1      2
2      3
Add    8
dtype: int64

### Append two series

In [73]:
dic = {'Alex': 'Alex@gmail.com','John':'John@gmail.com','Jeff':'Jeff@gmail.com'}
dic1 = pd.Series(['Abc@gmail.com','xcy@gmail.com','pop@gmail.com'], index = ['kelly','kelly','kelly'])

d1 = pd.Series(dic)
d2 = pd.Series(dic1)


all_dic = d1.append(d2)

In [74]:
all_dic

Alex     Alex@gmail.com
John     John@gmail.com
Jeff     Jeff@gmail.com
kelly     Abc@gmail.com
kelly     xcy@gmail.com
kelly     pop@gmail.com
dtype: object

In [76]:
all_dic.loc['kelly']

kelly    Abc@gmail.com
kelly    xcy@gmail.com
kelly    pop@gmail.com
dtype: object

## DataFrame

In [80]:
record1 = pd.Series({'Name': 'Alice',
                        'Class': 'Physics',
                        'Score': 85})
record2 = pd.Series({'Name': 'Jack',
                        'Class': 'Chemistry',
                        'Score': 82})
record3 = pd.Series({'Name': 'Helen',
                        'Class': 'Biology',
                        'Score': 90})


In [81]:
pd.DataFrame([record1,record2,record3],index = ['Student1','Student2','Student3'])

Unnamed: 0,Name,Class,Score
Student1,Alice,Physics,85
Student2,Jack,Chemistry,82
Student3,Helen,Biology,90


In [120]:
students = [{'Name': 'Alice',
              'Class': 'Physics',
              'Score': 85},
            {'Name': 'Jack',
             'Class': 'Chemistry',
             'Score': 82},
            {'Name': 'Helen',
             'Class': 'Biology',
             'Score': 90}]

df = pd.DataFrame(students,index = ['Student10','Student2','Student3'])

In [89]:
df

Unnamed: 0,Name,Class,Score
Student10,Alice,Physics,85
Student2,Jack,Chemistry,82
Student3,Helen,Biology,90


In [90]:
df.loc['Student10']

Name       Alice
Class    Physics
Score         85
Name: Student10, dtype: object

In [91]:
df.loc['Student10','Name']

'Alice'

In [95]:
df.loc[:,'Name']

Student10    Alice
Student2      Jack
Student3     Helen
Name: Name, dtype: object

In [98]:
df1 = df.T

In [101]:
df1

Unnamed: 0,Student10,Student2,Student3
Name,Alice,Jack,Helen
Class,Physics,Chemistry,Biology
Score,85,82,90


In [102]:
df1.loc['Name']['Student10']

'Alice'

In [104]:
df1.loc[:,['Student2','Student3']]

Unnamed: 0,Student2,Student3
Name,Jack,Helen
Class,Chemistry,Biology
Score,82,90


### Drop

In [119]:
df

Unnamed: 0,Class,Score
Student10,Physics,85
Student2,Chemistry,82
Student3,Biology,90


In [123]:
#axis = 0 (Check row wise)
#axis = 1 (Check column wise)
df.drop('Name',axis = 1,inplace = True)

In [124]:
df

Unnamed: 0,Class,Score
Student10,Physics,85
Student2,Chemistry,82
Student3,Biology,90


In [125]:
del df['Class']

In [126]:
df

Unnamed: 0,Score
Student10,85
Student2,82
Student3,90


### Add new columns in dataframe

In [129]:
df['Rank'] = None
df

Unnamed: 0,Score,Rank
Student10,85,
Student2,82,
Student3,90,


In [130]:
### Shift + tab

In [132]:
df = pd.read_csv('datasets/Admission_Predict.csv')

df

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.00,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.80
4,5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...,...
395,396,324,110,3,3.5,3.5,9.04,1,0.82
396,397,325,107,3,3.0,3.5,9.11,1,0.84
397,398,330,116,4,5.0,4.5,9.45,1,0.91
398,399,312,103,3,3.5,4.0,8.78,0,0.67


In [133]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [136]:
df = pd.read_csv('datasets/Admission_Predict.csv',index_col = 0)
df.head()

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
5,314,103,2,2.0,3.0,8.21,0,0.65


### rename

In [139]:
df.rename(columns = {'SOP':'Statement of Purpose','LOR':'Letter of Recommendation'})

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,Statement of Purpose,LOR,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.00,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.80
5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...
396,324,110,3,3.5,3.5,9.04,1,0.82
397,325,107,3,3.0,3.5,9.11,1,0.84
398,330,116,4,5.0,4.5,9.45,1,0.91
399,312,103,3,3.5,4.0,8.78,0,0.67


In [140]:
df.columns

Index(['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR ', 'CGPA',
       'Research', 'Chance of Admit '],
      dtype='object')

In [142]:
new_df = df.rename(mapper = str.strip,axis = 1)
new_df.columns

Index(['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR', 'CGPA',
       'Research', 'Chance of Admit'],
      dtype='object')

In [144]:
dt =df.columns

rename_cols = [x.lower().strip() for x in dt]
df.columns = rename_cols

In [145]:
df.columns

Index(['gre score', 'toefl score', 'university rating', 'sop', 'lor', 'cgpa',
       'research', 'chance of admit'],
      dtype='object')

In [147]:
df.rename(columns = {'sop':'Statement of Purpose','lor':'Letter of Recommendation'})

Unnamed: 0_level_0,gre score,toefl score,university rating,Statement of Purpose,Letter of Recommendation,cgpa,research,chance of admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.00,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.80
5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...
396,324,110,3,3.5,3.5,9.04,1,0.82
397,325,107,3,3.0,3.5,9.11,1,0.84
398,330,116,4,5.0,4.5,9.45,1,0.91
399,312,103,3,3.5,4.0,8.78,0,0.67
