# Pandas 101

In [2]:
import pandas as pd

## Create dataframe

In [3]:
df = pd.DataFrame([[110, 2000, 35000],[100, 2500, 30000]])
df

Unnamed: 0,0,1,2
0,110,2000,35000
1,100,2500,30000


In [4]:
# DataFrame shape
df.shape

(2, 3)

In [5]:
# Include Column Names
df = pd.DataFrame([[110, 2000, 35000],[100, 2500, 30000]], columns=["Top Speed", "Weight", "Price"])
df

Unnamed: 0,Top Speed,Weight,Price
0,110,2000,35000
1,100,2500,30000


In [6]:
# Include row/index names
df = pd.DataFrame([[110, 2000, 35000],[100, 2500, 30000]], columns=["Top Speed", "Weight", "Price"], index=["Car A", "Car B"])
df

Unnamed: 0,Top Speed,Weight,Price
Car A,110,2000,35000
Car B,100,2500,30000


In [7]:
# Print index
df.index

Index(['Car A', 'Car B'], dtype='object')

## Create dataframe from Python Directory

In [9]:
# create from Python dictionary
weather_data = {
    'day' : ['5/1/2022', '5/2/2022','5/3/2022','5/4/2022','5/5/2022','5/6/2022'],
    'temperature' : [44,45,28,24,33,56],
    'windspeed' : [6,7,2,7,4,2],
    'event' : ['Rain','Sunny','Snow','Snow','Rain','Sunny']
}
df = pd.DataFrame(weather_data)
df

Unnamed: 0,day,temperature,windspeed,event
0,5/1/2022,44,6,Rain
1,5/2/2022,45,7,Sunny
2,5/3/2022,28,2,Snow
3,5/4/2022,24,7,Snow
4,5/5/2022,33,4,Rain
5,5/6/2022,56,2,Sunny


## DataFrame size

In [10]:
df.shape

(6, 4)

In [11]:
rows, cols = df.shape
print("Rows: ", rows)
print("Cols: ", cols)

Rows:  6
Cols:  4


## Print rows

In [12]:
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,5/1/2022,44,6,Rain
1,5/2/2022,45,7,Sunny
2,5/3/2022,28,2,Snow
3,5/4/2022,24,7,Snow
4,5/5/2022,33,4,Rain


In [13]:
df.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,5/1/2022,44,6,Rain
1,5/2/2022,45,7,Sunny


In [14]:
df.tail(2)

Unnamed: 0,day,temperature,windspeed,event
4,5/5/2022,33,4,Rain
5,5/6/2022,56,2,Sunny


In [15]:
# Rows 2-4
df[2:5]

Unnamed: 0,day,temperature,windspeed,event
2,5/3/2022,28,2,Snow
3,5/4/2022,24,7,Snow
4,5/5/2022,33,4,Rain


In [16]:
# All rows
df[:]

Unnamed: 0,day,temperature,windspeed,event
0,5/1/2022,44,6,Rain
1,5/2/2022,45,7,Sunny
2,5/3/2022,28,2,Snow
3,5/4/2022,24,7,Snow
4,5/5/2022,33,4,Rain
5,5/6/2022,56,2,Sunny


## Print Columns

In [17]:
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [18]:
# Print individual column
df.day

0    5/1/2022
1    5/2/2022
2    5/3/2022
3    5/4/2022
4    5/5/2022
5    5/6/2022
Name: day, dtype: object

In [19]:
# or as accessing property in dictionary
df['day']

0    5/1/2022
1    5/2/2022
2    5/3/2022
3    5/4/2022
4    5/5/2022
5    5/6/2022
Name: day, dtype: object

In [20]:
#Print some of the columns
df[['event', 'day']]

Unnamed: 0,event,day
0,Rain,5/1/2022
1,Sunny,5/2/2022
2,Snow,5/3/2022
3,Snow,5/4/2022
4,Rain,5/5/2022
5,Sunny,5/6/2022


## Types

In [21]:
type(df['event'])

pandas.core.series.Series

## Operations on Dataframes

In [22]:
df

Unnamed: 0,day,temperature,windspeed,event
0,5/1/2022,44,6,Rain
1,5/2/2022,45,7,Sunny
2,5/3/2022,28,2,Snow
3,5/4/2022,24,7,Snow
4,5/5/2022,33,4,Rain
5,5/6/2022,56,2,Sunny


In [23]:
# Max temperature
df['temperature'].max()

56

In [24]:
# Mean temperature
df['temperature'].mean()

38.333333333333336

In [25]:
# Mean temperature
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,38.333333,4.666667
std,12.077527,2.33809
min,24.0,2.0
25%,29.25,2.5
50%,38.5,5.0
75%,44.75,6.75
max,56.0,7.0


In [26]:
df[df.temperature>=32]

Unnamed: 0,day,temperature,windspeed,event
0,5/1/2022,44,6,Rain
1,5/2/2022,45,7,Sunny
4,5/5/2022,33,4,Rain
5,5/6/2022,56,2,Sunny


In [27]:
df[df.temperature==df.temperature.max()]

Unnamed: 0,day,temperature,windspeed,event
5,5/6/2022,56,2,Sunny


In [28]:
df['day'][df.temperature==df.temperature.max()]

5    5/6/2022
Name: day, dtype: object

In [29]:
df[['day','temperature']][df.temperature==df.temperature.max()]

Unnamed: 0,day,temperature
5,5/6/2022,56


## Indexing

In [30]:
df

Unnamed: 0,day,temperature,windspeed,event
0,5/1/2022,44,6,Rain
1,5/2/2022,45,7,Sunny
2,5/3/2022,28,2,Snow
3,5/4/2022,24,7,Snow
4,5/5/2022,33,4,Rain
5,5/6/2022,56,2,Sunny


In [31]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [32]:
df

Unnamed: 0,day,temperature,windspeed,event
0,5/1/2022,44,6,Rain
1,5/2/2022,45,7,Sunny
2,5/3/2022,28,2,Snow
3,5/4/2022,24,7,Snow
4,5/5/2022,33,4,Rain
5,5/6/2022,56,2,Sunny


In [33]:
df.loc[4]

day            5/5/2022
temperature          33
windspeed             4
event              Rain
Name: 4, dtype: object

In [34]:
df.reset_index()

Unnamed: 0,index,day,temperature,windspeed,event
0,0,5/1/2022,44,6,Rain
1,1,5/2/2022,45,7,Sunny
2,2,5/3/2022,28,2,Snow
3,3,5/4/2022,24,7,Snow
4,4,5/5/2022,33,4,Rain
5,5,5/6/2022,56,2,Sunny


In [35]:
# First Row
df.iloc[0]

day            5/1/2022
temperature          44
windspeed             6
event              Rain
Name: 0, dtype: object

In [36]:
#First 2 rows
df.iloc[0:2]

Unnamed: 0,day,temperature,windspeed,event
0,5/1/2022,44,6,Rain
1,5/2/2022,45,7,Sunny


In [37]:
#specific column
df.iloc[0,2]

6

In [38]:
type(df.iloc[0,2])

numpy.int64

In [39]:
# 1 row, 2 columns
df.iloc[0,2:4]

windspeed       6
event        Rain
Name: 0, dtype: object

In [40]:
type(df.iloc[0,2:4])

pandas.core.series.Series

In [41]:
# 2 row, 2 columns
df.iloc[0:2,2:4]

Unnamed: 0,windspeed,event
0,6,Rain
1,7,Sunny


In [42]:
type(df.iloc[0:2,2:4])

pandas.core.frame.DataFrame

## Create dataframe from file

In [48]:
df2 = pd.read_csv("data/temperatures.csv")

In [49]:
df2

Unnamed: 0,1,5/1/2022,44,6,Rain
0,2,5/2/2022,45,7,Sunny
1,3,5/3/2022,28,2,Snow
2,4,5/4/2022,24,7,Snow
3,5,5/5/2022,33,4,Rain
4,6,5/6/2022,56,2,Sunny
5,7,5/7/2022,40,8,Rain
6,8,5/8/2022,36,0,Fog


In [55]:
# Prevent first line from becoming a header
df2 = pd.read_csv("data/temperatures.csv", header=None)
df2

Unnamed: 0,0,1,2,3,4
0,1,5/1/2022,44,6,Rain
1,2,5/2/2022,45,7,Sunny
2,3,5/3/2022,28,2,Snow
3,4,5/4/2022,24,7,Snow
4,5,5/5/2022,33,4,Rain
5,6,5/6/2022,56,2,Sunny
6,7,5/7/2022,40,8,Rain
7,8,5/8/2022,36,0,Fog


## Column Names and Index Column

In [56]:
df2

Unnamed: 0,0,1,2,3,4
0,1,5/1/2022,44,6,Rain
1,2,5/2/2022,45,7,Sunny
2,3,5/3/2022,28,2,Snow
3,4,5/4/2022,24,7,Snow
4,5,5/5/2022,33,4,Rain
5,6,5/6/2022,56,2,Sunny
6,7,5/7/2022,40,8,Rain
7,8,5/8/2022,36,0,Fog


In [59]:
# Set Column Names
df2.columns = ["id", "day", "temperature", "windspeed", "event"]
df2

Unnamed: 0,id,day,temperature,windspeed,event
0,1,5/1/2022,44,6,Rain
1,2,5/2/2022,45,7,Sunny
2,3,5/3/2022,28,2,Snow
3,4,5/4/2022,24,7,Snow
4,5,5/5/2022,33,4,Rain
5,6,5/6/2022,56,2,Sunny
6,7,5/7/2022,40,8,Rain
7,8,5/8/2022,36,0,Fog


In [60]:
# Set Index Column
df3 = df2.set_index("id")
df3

Unnamed: 0_level_0,day,temperature,windspeed,event
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5/1/2022,44,6,Rain
2,5/2/2022,45,7,Sunny
3,5/3/2022,28,2,Snow
4,5/4/2022,24,7,Snow
5,5/5/2022,33,4,Rain
6,5/6/2022,56,2,Sunny
7,5/7/2022,40,8,Rain
8,5/8/2022,36,0,Fog


## Deleting Rows and Columns

In [64]:
df4 = df3.drop("windspeed", 1)  # 1 means delete column, 0 means delete row
df4

  df4 = df3.drop("windspeed", 1)  # 1 means delete column, 0 means delete row


Unnamed: 0_level_0,day,temperature,event
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,5/1/2022,44,Rain
2,5/2/2022,45,Sunny
3,5/3/2022,28,Snow
4,5/4/2022,24,Snow
5,5/5/2022,33,Rain
6,5/6/2022,56,Sunny
7,5/7/2022,40,Rain
8,5/8/2022,36,Fog


In [66]:
df5 = df4.drop(6,0)
df5

  df5 = df4.drop(6,0)


Unnamed: 0_level_0,day,temperature,event
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,5/1/2022,44,Rain
2,5/2/2022,45,Sunny
3,5/3/2022,28,Snow
4,5/4/2022,24,Snow
5,5/5/2022,33,Rain
7,5/7/2022,40,Rain
8,5/8/2022,36,Fog


## Adding new Columns

In [67]:
# Prevent first line from becoming a header
df = pd.read_csv("data/temperatures.csv", header=None)
df

Unnamed: 0,0,1,2,3,4
0,1,5/1/2022,44,6,Rain
1,2,5/2/2022,45,7,Sunny
2,3,5/3/2022,28,2,Snow
3,4,5/4/2022,24,7,Snow
4,5,5/5/2022,33,4,Rain
5,6,5/6/2022,56,2,Sunny
6,7,5/7/2022,40,8,Rain
7,8,5/8/2022,36,0,Fog


In [68]:
# Set Column Names
df.columns = ["id", "day", "temperature", "windspeed", "event"]
df

Unnamed: 0,id,day,temperature,windspeed,event
0,1,5/1/2022,44,6,Rain
1,2,5/2/2022,45,7,Sunny
2,3,5/3/2022,28,2,Snow
3,4,5/4/2022,24,7,Snow
4,5,5/5/2022,33,4,Rain
5,6,5/6/2022,56,2,Sunny
6,7,5/7/2022,40,8,Rain
7,8,5/8/2022,36,0,Fog


In [70]:
df["humidity"] = [80, 20, 60, 62, 88, 15, 88, 95] 
df

Unnamed: 0,id,day,temperature,windspeed,event,humidity
0,1,5/1/2022,44,6,Rain,80
1,2,5/2/2022,45,7,Sunny,20
2,3,5/3/2022,28,2,Snow,60
3,4,5/4/2022,24,7,Snow,62
4,5,5/5/2022,33,4,Rain,88
5,6,5/6/2022,56,2,Sunny,15
6,7,5/7/2022,40,8,Rain,88
7,8,5/8/2022,36,0,Fog,95


## Add new Rows

In [72]:
# Transpose
df_t = df.T
df_t

Unnamed: 0,0,1,2,3,4,5,6,7
id,1,2,3,4,5,6,7,8
day,5/1/2022,5/2/2022,5/3/2022,5/4/2022,5/5/2022,5/6/2022,5/7/2022,5/8/2022
temperature,44,45,28,24,33,56,40,36
windspeed,6,7,2,7,4,2,8,0
event,Rain,Sunny,Snow,Snow,Rain,Sunny,Rain,Fog
humidity,80,20,60,62,88,15,88,95


In [76]:
# Add column to Transposed frame
df_t[8] = [8, "5/9/2022", 45, 9, "Rain", 85] 
df_t

Unnamed: 0,0,1,2,3,4,5,6,7,8
id,1,2,3,4,5,6,7,8,8
day,5/1/2022,5/2/2022,5/3/2022,5/4/2022,5/5/2022,5/6/2022,5/7/2022,5/8/2022,5/9/2022
temperature,44,45,28,24,33,56,40,36,45
windspeed,6,7,2,7,4,2,8,0,9
event,Rain,Sunny,Snow,Snow,Rain,Sunny,Rain,Fog,Rain
humidity,80,20,60,62,88,15,88,95,85


In [78]:
# Transpose again to get a frame with extra row
df = df_t.T
df

Unnamed: 0,id,day,temperature,windspeed,event,humidity
0,1,5/1/2022,44,6,Rain,80
1,2,5/2/2022,45,7,Sunny,20
2,3,5/3/2022,28,2,Snow,60
3,4,5/4/2022,24,7,Snow,62
4,5,5/5/2022,33,4,Rain,88
5,6,5/6/2022,56,2,Sunny,15
6,7,5/7/2022,40,8,Rain,88
7,8,5/8/2022,36,0,Fog,95
8,8,5/9/2022,45,9,Rain,85
