# Pandas 101

In [1]:
import pandas as pd

## Create dataframe

In [39]:
df = pd.DataFrame([[110, 2000, 35000],[100, 2500, 30000]])
df

Unnamed: 0,0,1,2
0,110,2000,35000
1,100,2500,30000


In [40]:
# DataFrame shape
df.shape

(2, 3)

In [32]:
# Include Column Names
df = pd.DataFrame([[110, 2000, 35000],[100, 2500, 30000]], columns=["Top Speed", "Weight", "Price"])
df

Unnamed: 0,Top Speed,Weight,Price
0,110,2000,35000
1,100,2500,30000


In [33]:
# Include row/index names
df = pd.DataFrame([[110, 2000, 35000],[100, 2500, 30000]], columns=["Top Speed", "Weight", "Price"], index=["Car A", "Car B"])
df

Unnamed: 0,Top Speed,Weight,Price
Car A,110,2000,35000
Car B,100,2500,30000


In [35]:
# Print index
df.index

Index(['Car A', 'Car B'], dtype='object')

## Create dataframe from Python Directory

In [2]:
# create from Python dictionary
weather_data = {
    'day' : ['1/1/2022', '1/2/2022','1/3/2022','1/4/2022','1/5/2022','1/6/2022'],
    'temperature' : [32,35,28,24,32,31],
    'windspeed' : [6,7,2,7,4,2],
    'event' : ['Rain','Sunny','Snow','Snow','Rain','Sunny']
}
df = pd.DataFrame(weather_data)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,32,6,Rain
1,1/2/2022,35,7,Sunny
2,1/3/2022,28,2,Snow
3,1/4/2022,24,7,Snow
4,1/5/2022,32,4,Rain
5,1/6/2022,31,2,Sunny


## DataFrame size

In [3]:
df.shape

(6, 4)

In [4]:
rows, cols = df.shape
print("Rows: ", rows)
print("Cols: ", cols)

Rows:  6
Cols:  4


## Print rows

In [5]:
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,32,6,Rain
1,1/2/2022,35,7,Sunny
2,1/3/2022,28,2,Snow
3,1/4/2022,24,7,Snow
4,1/5/2022,32,4,Rain


In [6]:
df.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,32,6,Rain
1,1/2/2022,35,7,Sunny


In [7]:
df.tail(2)

Unnamed: 0,day,temperature,windspeed,event
4,1/5/2022,32,4,Rain
5,1/6/2022,31,2,Sunny


In [8]:
# Rows 2-4
df[2:5]

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2022,28,2,Snow
3,1/4/2022,24,7,Snow
4,1/5/2022,32,4,Rain


In [9]:
# All rows
df[:]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,32,6,Rain
1,1/2/2022,35,7,Sunny
2,1/3/2022,28,2,Snow
3,1/4/2022,24,7,Snow
4,1/5/2022,32,4,Rain
5,1/6/2022,31,2,Sunny


## Print Columns

In [10]:
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [11]:
# Print individual column
df.day

0    1/1/2022
1    1/2/2022
2    1/3/2022
3    1/4/2022
4    1/5/2022
5    1/6/2022
Name: day, dtype: object

In [12]:
# or as accessing property in dictionary
df['day']

0    1/1/2022
1    1/2/2022
2    1/3/2022
3    1/4/2022
4    1/5/2022
5    1/6/2022
Name: day, dtype: object

In [13]:
#Print some of the columns
df[['event', 'day']]

Unnamed: 0,event,day
0,Rain,1/1/2022
1,Sunny,1/2/2022
2,Snow,1/3/2022
3,Snow,1/4/2022
4,Rain,1/5/2022
5,Sunny,1/6/2022


## Types

In [14]:
type(df['event'])

pandas.core.series.Series

## Operations on Dataframes

In [15]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,32,6,Rain
1,1/2/2022,35,7,Sunny
2,1/3/2022,28,2,Snow
3,1/4/2022,24,7,Snow
4,1/5/2022,32,4,Rain
5,1/6/2022,31,2,Sunny


In [16]:
# Max temperature
df['temperature'].max()

35

In [17]:
# Mean temperature
df['temperature'].mean()

30.333333333333332

In [18]:
# Mean temperature
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [19]:
df[df.temperature>=32]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,32,6,Rain
1,1/2/2022,35,7,Sunny
4,1/5/2022,32,4,Rain


In [20]:
df[df.temperature==df.temperature.max()]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2022,35,7,Sunny


In [21]:
df['day'][df.temperature==df.temperature.max()]

1    1/2/2022
Name: day, dtype: object

In [22]:
df[['day','temperature']][df.temperature==df.temperature.max()]

Unnamed: 0,day,temperature
1,1/2/2022,35


## Indexing

In [23]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2022,32,6,Rain
1,1/2/2022,35,7,Sunny
2,1/3/2022,28,2,Snow
3,1/4/2022,24,7,Snow
4,1/5/2022,32,4,Rain
5,1/6/2022,31,2,Sunny


In [24]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [25]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [40]:
df.loc[4]

day            1/5/2017
temperature          32
windspeed             4
event              Rain
Name: 4, dtype: object

In [27]:
df.reset_index()

Unnamed: 0,index,day,temperature,windspeed,event
0,0,1/1/2017,32,6,Rain
1,1,1/2/2017,35,7,Sunny
2,2,1/3/2017,28,2,Snow
3,3,1/4/2017,24,7,Snow
4,4,1/5/2017,32,4,Rain
5,5,1/6/2017,31,2,Sunny


In [28]:
# First Row
df.iloc[0]

day            1/1/2017
temperature          32
windspeed             6
event              Rain
Name: 0, dtype: object

In [29]:
#First 2 rows
df.iloc[0:2]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny


In [30]:
#specific column
df.iloc[0,2]

6

In [31]:
type(df.iloc[0,2])

numpy.int64

In [32]:
# 1 row, 2 columns
df.iloc[0,2:4]

windspeed       6
event        Rain
Name: 0, dtype: object

In [33]:
type(df.iloc[0,2:4])

pandas.core.series.Series

In [34]:
# 2 row, 2 columns
df.iloc[0:2,2:4]

Unnamed: 0,windspeed,event
0,6,Rain
1,7,Sunny


In [35]:
type(df.iloc[0:2,2:4])

pandas.core.frame.DataFrame