# Working with Pandas


### use of pandas
    - Primarily used to process structured data
    - Data import / export
    - Data exploration, statistical analysis
    - Data manipulation, wrangling, transformation
    - Data aggregation, data cleaning
    
    
### there are two primary data types in pandas
    - Pandas Series - 1D data
    - Pandas DataFrame - 2D data

In [1]:
import pandas as pd

### Pandas Series

In [2]:
ps = pd.Series([25,24,32,38,37,24],name='Age')
ps

0    25
1    24
2    32
3    38
4    37
5    24
Name: Age, dtype: int64

In [3]:
type(ps)

pandas.core.series.Series

In [4]:
## Pandas DataFrames
mydic = {"Name":['John','Jessy','Jason','Jinny','James','Joseph','Jacob'],
         "Age": [25,35,42,46,22,34,41],
         "Gender":['Male','Female','Male','Female','Male','Male','Male']}

df = pd.DataFrame(mydic)
df

Unnamed: 0,Name,Age,Gender
0,John,25,Male
1,Jessy,35,Female
2,Jason,42,Male
3,Jinny,46,Female
4,James,22,Male
5,Joseph,34,Male
6,Jacob,41,Male


In [5]:
type(df)

pandas.core.frame.DataFrame

In [6]:
print(df.shape)

(7, 3)


In [7]:
df['Name']

0      John
1     Jessy
2     Jason
3     Jinny
4     James
5    Joseph
6     Jacob
Name: Name, dtype: object

In [8]:
type(df['Name'])

pandas.core.series.Series

## Selection & Filtering

In [9]:
df.head() 

Unnamed: 0,Name,Age,Gender
0,John,25,Male
1,Jessy,35,Female
2,Jason,42,Male
3,Jinny,46,Female
4,James,22,Male


In [10]:
df.head(2)

Unnamed: 0,Name,Age,Gender
0,John,25,Male
1,Jessy,35,Female


In [11]:
df.tail(2)

Unnamed: 0,Name,Age,Gender
5,Joseph,34,Male
6,Jacob,41,Male


In [12]:
# access a column
df['Name'] # recommended

0      John
1     Jessy
2     Jason
3     Jinny
4     James
5    Joseph
6     Jacob
Name: Name, dtype: object

In [13]:
type(df['Name'])

pandas.core.series.Series

In [14]:
df.Name

0      John
1     Jessy
2     Jason
3     Jinny
4     James
5    Joseph
6     Jacob
Name: Name, dtype: object

In [15]:
df[['Name','Gender']]

Unnamed: 0,Name,Gender
0,John,Male
1,Jessy,Female
2,Jason,Male
3,Jinny,Female
4,James,Male
5,Joseph,Male
6,Jacob,Male


In [16]:
df[2:5]

Unnamed: 0,Name,Age,Gender
2,Jason,42,Male
3,Jinny,46,Female
4,James,22,Male


In [17]:
df[2:3]

Unnamed: 0,Name,Age,Gender
2,Jason,42,Male


In [19]:
# accessing one or many values
df.loc[2,:]

Name      Jason
Age          42
Gender     Male
Name: 2, dtype: object

In [22]:
df.loc[2,"Age"]

42

In [23]:
# iloc - accessing with index of rows and cols
df.iloc[2,1]

42

In [24]:
df.iloc[2:5,1:3]

Unnamed: 0,Age,Gender
2,42,Male
3,46,Female
4,22,Male


In [25]:
df[['Age','Gender']][2:5]

Unnamed: 0,Age,Gender
2,42,Male
3,46,Female
4,22,Male


### Filtering

In [26]:
df

Unnamed: 0,Name,Age,Gender
0,John,25,Male
1,Jessy,35,Female
2,Jason,42,Male
3,Jinny,46,Female
4,James,22,Male
5,Joseph,34,Male
6,Jacob,41,Male


In [34]:
df[3:5]

Unnamed: 0,Name,Age,Gender
3,Jinny,46,Female
4,James,22,Male


In [27]:
# numeric filter
df[df.Age<35]

Unnamed: 0,Name,Age,Gender
0,John,25,Male
4,James,22,Male
5,Joseph,34,Male


In [28]:
df.Age<35

0     True
1    False
2    False
3    False
4     True
5     True
6    False
Name: Age, dtype: bool

In [29]:
df[df.Gender=='Female']

Unnamed: 0,Name,Age,Gender
1,Jessy,35,Female
3,Jinny,46,Female


In [30]:
df[df.Gender!='Female']

Unnamed: 0,Name,Age,Gender
0,John,25,Male
2,Jason,42,Male
4,James,22,Male
5,Joseph,34,Male
6,Jacob,41,Male


In [31]:
df[df.Age>30][df.Gender=='Male']

  df[df.Age>30][df.Gender=='Male']


Unnamed: 0,Name,Age,Gender
2,Jason,42,Male
5,Joseph,34,Male
6,Jacob,41,Male


In [32]:
# AND logic
df[(df.Age>30) & (df.Gender=='Male')]

Unnamed: 0,Name,Age,Gender
2,Jason,42,Male
5,Joseph,34,Male
6,Jacob,41,Male


In [33]:
# OR logic
df[(df.Age>30) | (df.Gender=='Male')]

Unnamed: 0,Name,Age,Gender
0,John,25,Male
1,Jessy,35,Female
2,Jason,42,Male
3,Jinny,46,Female
4,James,22,Male
5,Joseph,34,Male
6,Jacob,41,Male
