# Working with pandas

    - Data import / export
    - data manipulation - filtering, selection
    - data aggregation, statistical analysis
    - data wrangling, transformation
    - data cleaning
    
    
Pandas has two data types

    - 1. Pandas Series - 1D data
    - 2. Pandas Dataframe - 2D data

In [1]:
import pandas as pd

In [2]:
# creating a series

ps = pd.Series([25,36,28,45,26],name='Age')
ps

0    25
1    36
2    28
3    45
4    26
Name: Age, dtype: int64

In [4]:
type(ps)

pandas.core.series.Series

In [3]:
mydict = {"Name":['James','Jane','Jenny','John','Jason','Jack','Jessy','Johny'],
          "Age":[25,12,35,36,45,42,32,26],
          "Gender":['Male','Female','Female','Male','Male','Male','Female','Male']}

df = pd.DataFrame(mydict)
df

Unnamed: 0,Name,Age,Gender
0,James,25,Male
1,Jane,12,Female
2,Jenny,35,Female
3,John,36,Male
4,Jason,45,Male
5,Jack,42,Male
6,Jessy,32,Female
7,Johny,26,Male


In [5]:
type(df)

pandas.core.frame.DataFrame

In [6]:
df['Name']

0    James
1     Jane
2    Jenny
3     John
4    Jason
5     Jack
6    Jessy
7    Johny
Name: Name, dtype: object

In [7]:
type(df['Name'])

pandas.core.series.Series

## Selection 

In [8]:
df

Unnamed: 0,Name,Age,Gender
0,James,25,Male
1,Jane,12,Female
2,Jenny,35,Female
3,John,36,Male
4,Jason,45,Male
5,Jack,42,Male
6,Jessy,32,Female
7,Johny,26,Male


In [9]:
df.head()

Unnamed: 0,Name,Age,Gender
0,James,25,Male
1,Jane,12,Female
2,Jenny,35,Female
3,John,36,Male
4,Jason,45,Male


In [10]:
df.head(2)

Unnamed: 0,Name,Age,Gender
0,James,25,Male
1,Jane,12,Female


In [11]:
df.tail(3)

Unnamed: 0,Name,Age,Gender
5,Jack,42,Male
6,Jessy,32,Female
7,Johny,26,Male


In [12]:
# selecting columns
df.Name

0    James
1     Jane
2    Jenny
3     John
4    Jason
5     Jack
6    Jessy
7    Johny
Name: Name, dtype: object

In [13]:
df['Name'] # recommended

0    James
1     Jane
2    Jenny
3     John
4    Jason
5     Jack
6    Jessy
7    Johny
Name: Name, dtype: object

In [14]:
df[['Name','Gender']]

Unnamed: 0,Name,Gender
0,James,Male
1,Jane,Female
2,Jenny,Female
3,John,Male
4,Jason,Male
5,Jack,Male
6,Jessy,Female
7,Johny,Male


In [15]:
# selecting rows
df[2:6]

Unnamed: 0,Name,Age,Gender
2,Jenny,35,Female
3,John,36,Male
4,Jason,45,Male
5,Jack,42,Male


In [16]:
df.loc[2:6,'Age']

2    35
3    36
4    45
5    42
6    32
Name: Age, dtype: int64

In [17]:
df.iloc[2:6,1:3]

Unnamed: 0,Age,Gender
2,35,Female
3,36,Male
4,45,Male
5,42,Male


In [19]:
df[2:6]['Age']

2    35
3    36
4    45
5    42
Name: Age, dtype: int64

In [20]:
df

Unnamed: 0,Name,Age,Gender
0,James,25,Male
1,Jane,12,Female
2,Jenny,35,Female
3,John,36,Male
4,Jason,45,Male
5,Jack,42,Male
6,Jessy,32,Female
7,Johny,26,Male


In [21]:
df.index

RangeIndex(start=0, stop=8, step=1)

In [22]:
df.index = [102,103,104,105,106,107,108,109]
df

Unnamed: 0,Name,Age,Gender
102,James,25,Male
103,Jane,12,Female
104,Jenny,35,Female
105,John,36,Male
106,Jason,45,Male
107,Jack,42,Male
108,Jessy,32,Female
109,Johny,26,Male


In [23]:
df.loc[104:108,'Age']

104    35
105    36
106    45
107    42
108    32
Name: Age, dtype: int64

In [24]:
df.iloc[2:6,1:3]

Unnamed: 0,Age,Gender
104,35,Female
105,36,Male
106,45,Male
107,42,Male


## Filtering

In [25]:
df

Unnamed: 0,Name,Age,Gender
102,James,25,Male
103,Jane,12,Female
104,Jenny,35,Female
105,John,36,Male
106,Jason,45,Male
107,Jack,42,Male
108,Jessy,32,Female
109,Johny,26,Male


In [35]:
# numeric filter
df[df.Age<=30]

Unnamed: 0,Name,Age,Gender
102,James,25,Male
103,Jane,12,Female
109,Johny,26,Male


In [36]:
# numeric filter
df[df['Age']<=30]

Unnamed: 0,Name,Age,Gender
102,James,25,Male
103,Jane,12,Female
109,Johny,26,Male


In [34]:
df.Age<=30

102     True
103     True
104    False
105    False
106    False
107    False
108    False
109     True
Name: Age, dtype: bool

In [29]:
# categoric filter
df[df.Gender=='Male']

Unnamed: 0,Name,Age,Gender
102,James,25,Male
105,John,36,Male
106,Jason,45,Male
107,Jack,42,Male
109,Johny,26,Male


In [30]:
# categoric filter
df[df.Gender!='Male']

Unnamed: 0,Name,Age,Gender
103,Jane,12,Female
104,Jenny,35,Female
108,Jessy,32,Female


In [31]:
# combining conditions - AND
df[df.Gender=='Male'][df.Age>30]

  df[df.Gender=='Male'][df.Age>30]


Unnamed: 0,Name,Age,Gender
105,John,36,Male
106,Jason,45,Male
107,Jack,42,Male


In [32]:
# combining conditions - AND
df[ (df.Age>30) & (df.Gender=='Male') ]

Unnamed: 0,Name,Age,Gender
105,John,36,Male
106,Jason,45,Male
107,Jack,42,Male


In [33]:
# combining conditions - OR
df[ (df.Age>30) | (df.Gender=='Male') ]

Unnamed: 0,Name,Age,Gender
102,James,25,Male
104,Jenny,35,Female
105,John,36,Male
106,Jason,45,Male
107,Jack,42,Male
108,Jessy,32,Female
109,Johny,26,Male
