In [1]:
import pandas as pd

In [2]:
# create series
rec1 = pd.Series({'name':'Aida' ,'age':21 })
rec2 = pd.Series({'name':'Sara' ,'age':22 })
rec3 = pd.Series({'name':'Saba' ,'age':19 })

In [3]:
# create data frame
students = pd.DataFrame([rec1,rec2,rec3], index=['student 1','student 2','student 3'])

In [4]:
# show first 5 rows of dataframe
students.head()

Unnamed: 0,name,age
student 1,Aida,21
student 2,Sara,22
student 3,Saba,19


In [5]:
# get the index object
students.index

Index(['student 1', 'student 2', 'student 3'], dtype='object')

In [6]:
# we could create data frame from a list of dictionaries

stds = [{'name':'Aida' ,'age':21 },{'name':'Sara' ,'age':22 },{'name':'Saba' ,'age':19 }]
students = pd.DataFrame(stds, index=['student 1','student 2','student 3'])
students.head()


Unnamed: 0,name,age
student 1,Aida,21
student 2,Sara,22
student 3,Saba,19


In [7]:
# we can rename columns in data frame 
students.rename(columns={'name':'Name'})

Unnamed: 0,Name,age
student 1,Aida,21
student 2,Sara,22
student 3,Saba,19


In [8]:
# we can use some function to edit columns of data frame such as strip (white space)
students.rename(mapper=str.strip, axis='columns')

Unnamed: 0,name,age
student 1,Aida,21
student 2,Sara,22
student 3,Saba,19


In [9]:
# get list of columns in data frame 
students.columns

Index(['name', 'age'], dtype='object')

In [10]:
# change index of data frame 
students.set_index('age')

Unnamed: 0_level_0,name
age,Unnamed: 1_level_1
21,Aida
22,Sara
19,Saba


In [11]:
# we can also set index with multiple columns 

students.set_index(['name','age'])

name,age
Aida,21
Sara,22
Saba,19


In [14]:
#reset index 
students.reset_index()

Unnamed: 0,index,name,age
0,student 1,Aida,21
1,student 2,Sara,22
2,student 3,Saba,19


In [13]:
# we can get unique values 

students['age'].unique()

array([21, 22, 19])

In [15]:
# we can filter result

students[students['age'] >20]

Unnamed: 0,name,age
student 1,Aida,21
student 2,Sara,22


In [16]:
# we can search in dataframe 

students.where(students['age']==21)

Unnamed: 0,name,age
student 1,Aida,21.0
student 2,,
student 3,,


In [17]:
# we can remove NaN in dataframe

(students.where(students['age']==21)).dropna()

Unnamed: 0,name,age
student 1,Aida,21.0


In [20]:
# if we wanted to fill all missing values with 0, we would use fillna
(students.where(students['age']==21)).fillna(0)

Unnamed: 0,name,age
student 1,Aida,21.0
student 2,0,0.0
student 3,0,0.0


In [21]:
# we can sort our indexes
students.sort_index()

Unnamed: 0,name,age
student 1,Aida,21
student 2,Sara,22
student 3,Saba,19


In [23]:
# DataFrame of booleans showing whether each element in the DataFrame is contained in values.

students['age'].isin ([21,18])

student 1     True
student 2    False
student 3    False
Name: age, dtype: bool

# extract data

we can extract data using the .iloc and .loc attributes

- they will return the series if there's only one row to return.
    but multiple rows of the DataFrame will return as a DataFram
- we can can quickly select data based on multiple axes
- if we just wanted to select a single column, we could transpose the matrix 

In [8]:
students.loc['student 1']

name    Aida
age       21
Name: student 1, dtype: object

In [9]:
students.T.loc['name']

student 1    Aida
student 2    Sara
student 3    Saba
Name: name, dtype: object

In [12]:
# we can also get a column of data frame in this way 
students['name']

student 1    Aida
student 2    Sara
student 3    Saba
Name: name, dtype: object

# read file 

In [11]:
# read from csv 
pd.read_csv('datasets/Admission_Predict.csv', index_col=0)  

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.00,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.80
5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...
396,324,110,3,3.5,3.5,9.04,1,0.82
397,325,107,3,3.0,3.5,9.11,1,0.84
398,330,116,4,5.0,4.5,9.45,1,0.91
399,312,103,3,3.5,4.0,8.78,0,0.67


In [12]:
# read from html 
pd.read_html("https://en.wikipedia.org/wiki/College_admissions_in_the_United_States")

[    0                                                  1
 0 NaN  This article has multiple issues. Please help ...
 1 NaN  This article contains instructions, advice, or...
 2 NaN  This article may contain an excessive amount o...,
     0                                                  1
 0 NaN  This article contains instructions, advice, or...,
     0                                                  1
 0 NaN  This article may contain an excessive amount o...,
     0                                                  1
 0 NaN  This overview section duplicates the intended ...,
    Unnamed: 0_level_0 Total (56 institutions)                     \
    Unnamed: 0_level_1      Unnamed: 1_level_1 Unnamed: 2_level_1   
     Admit Year (Fall)                    Apps             Admits   
 0                2001                  645111             198815   
 1                2002                  650908             202565   
 2                2003                  681989             206423   
 3