In [1]:
import pandas as pd
import numpy as np

# Introducing dtypes in Pandas

In [2]:
df = pd.DataFrame({'ID':['A', 'B', 'C', 'D'],
                   'Value':['1', '2', '3', '4']})
df

Unnamed: 0,ID,Value
0,A,1
1,B,2
2,C,3
3,D,4


In [3]:
print(df.dtypes)

ID       object
Value    object
dtype: object


In [4]:
df['Value'] ** 2

TypeError: unsupported operand type(s) for ** or pow(): 'str' and 'int'

In [5]:
df['Value'] * 2

0    11
1    22
2    33
3    44
Name: Value, dtype: object

In [6]:
'hello' * 3

'hellohellohello'

# 1. Converting Data Types

In [7]:
df['Value'].astype(int)

0    1
1    2
2    3
3    4
Name: Value, dtype: int32

In [8]:
df['Value'] = df['Value'].astype(int)

In [9]:
df.dtypes

ID       object
Value     int32
dtype: object

## 1.1 Converting datetime

In [10]:
df['Date'] = ['2019-04-23', '2019-04-24', '2019-04-25', '2019-04-26']

df

Unnamed: 0,ID,Value,Date
0,A,1,2019-04-23
1,B,2,2019-04-24
2,C,3,2019-04-25
3,D,4,2019-04-26


In [11]:
print(df.dtypes)

ID       object
Value     int32
Date     object
dtype: object


In [12]:
df.to_csv('dataframe2.csv')

### Method 1

In [13]:
pd.to_datetime(df['Date'])

0   2019-04-23
1   2019-04-24
2   2019-04-25
3   2019-04-26
Name: Date, dtype: datetime64[ns]

### Method 2

In [14]:
df['Date'].astype('datetime64[ns]')

0   2019-04-23
1   2019-04-24
2   2019-04-25
3   2019-04-26
Name: Date, dtype: datetime64[ns]

### Method 3

In [15]:
from datetime import datetime

df['Date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))

0   2019-04-23
1   2019-04-24
2   2019-04-25
3   2019-04-26
Name: Date, dtype: datetime64[ns]

# 2. Pivoting data

In [16]:
df = pd.DataFrame({'Company':['Apple', 'Apple', 'Apple', 'Microsoft', 'Microsoft', 'Microsoft', 'Google', 'Google', 'Google'],
                   'Department': ['IT', 'Finance', 'R&D', 'IT', 'Finance', 'R&D', 'IT', 'Finance', 'R&D'],
                   'Employees': [100, 80, 60, 110, 70, 82, 90, 59, 143]})

df

Unnamed: 0,Company,Department,Employees
0,Apple,IT,100
1,Apple,Finance,80
2,Apple,R&D,60
3,Microsoft,IT,110
4,Microsoft,Finance,70
5,Microsoft,R&D,82
6,Google,IT,90
7,Google,Finance,59
8,Google,R&D,143


In [17]:
df.to_csv('dataframe3.csv')

In [18]:
pd.pivot_table(df, index='Company', 
                   columns='Department', 
                   values='Employees')

Department,Finance,IT,R&D
Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Apple,80,100,60
Google,59,90,143
Microsoft,70,110,82


# 3. Conditional creation of columns

In [19]:
df['Employees'] >= 100

0     True
1    False
2    False
3     True
4    False
5    False
6    False
7    False
8     True
Name: Employees, dtype: bool

In [20]:
df['Size_indicator'] = np.where(df['Employees'] >= 100, 'Big', 'Small')
df

Unnamed: 0,Company,Department,Employees,Size_indicator
0,Apple,IT,100,Big
1,Apple,Finance,80,Small
2,Apple,R&D,60,Small
3,Microsoft,IT,110,Big
4,Microsoft,Finance,70,Small
5,Microsoft,R&D,82,Small
6,Google,IT,90,Small
7,Google,Finance,59,Small
8,Google,R&D,143,Big


In [21]:
df['Size_indicator'] = ['Big' if x >= 100 else 'Small' for x in df['Employees']]

In [22]:
df

Unnamed: 0,Company,Department,Employees,Size_indicator
0,Apple,IT,100,Big
1,Apple,Finance,80,Small
2,Apple,R&D,60,Small
3,Microsoft,IT,110,Big
4,Microsoft,Finance,70,Small
5,Microsoft,R&D,82,Small
6,Google,IT,90,Small
7,Google,Finance,59,Small
8,Google,R&D,143,Big


In [23]:
lst = []
for x in range(10):
    if x % 2 == 0:
        lst.append('even')
    else:
        lst.append('uneven')
        
lst

['even',
 'uneven',
 'even',
 'uneven',
 'even',
 'uneven',
 'even',
 'uneven',
 'even',
 'uneven']

In [24]:
['even' if x % 2 == 0 else 'uneven' for x in range(10)]

['even',
 'uneven',
 'even',
 'uneven',
 'even',
 'uneven',
 'even',
 'uneven',
 'even',
 'uneven']

# 4. Aggregation with multiple functions

In [25]:
df.groupby('Company').agg({'Employees':['sum', 'mean'],
                           'Size_indicator':'count'})

Unnamed: 0_level_0,Employees,Employees,Size_indicator
Unnamed: 0_level_1,sum,mean,count
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Apple,240,80.0,3
Google,292,97.333333,3
Microsoft,262,87.333333,3
