## DataFrame
Tabular data structure in pandas to represent data

In [19]:
import pandas as pd

# Retrieve data from a csv file (comma separated values) and store them in a dataframe
df = pd.read_csv("weather.csv")
df

Unnamed: 0,date,maximum temperature,minimum temperature,average temperature,precipitation,snow fall,snow depth
0,1-1-2016,42,34,38.0,0.0,0.0,0.0
1,2-1-2016,40,32,36.0,0.0,0.0,0.0
2,3-1-2016,45,35,40.0,0.0,0.0,0.0
3,4-1-2016,36,14,25.0,0.0,0.0,0.0
4,5-1-2016,29,11,20.0,0.0,0.0,0.0
5,6-1-2016,41,25,33.0,0.0,0.0,0.0
6,7-1-2016,46,31,38.5,0.0,0.0,0.0
7,8-1-2016,46,31,38.5,0.0,0.0,0.0
8,9-1-2016,47,40,43.5,0.0,0.0,0.0
9,10-1-2016,59,40,49.5,1.8,0.0,0.0


### Construct a DataFrame from a list of tuples

In [20]:
weather_data =[('6/5/2019',29,38,32),
               ('7/5/2019',23,42,38),
               ('8/5/2019',20,33,28),
               ('9/5/2019',22,35,30)]
# Create a DataFrame with the list of tuples and column names
df = pd.DataFrame(weather_data,columns=['date','min temp','max temp','avg temp'])
df

Unnamed: 0,date,min temp,max temp,avg temp
0,6/5/2019,29,38,32
1,7/5/2019,23,42,38
2,8/5/2019,20,33,28
3,9/5/2019,22,35,30


In [None]:
# Get the dimension/shape of the Dataframe
df.shape

In [5]:
# Fetch initial rows using head(default=5) rows
df.head(3)

Unnamed: 0,date,min temp,max temp,avg temp
0,6/5/2019,29,38,32
1,7/5/2019,23,42,38
2,8/5/2019,20,33,28


In [6]:
# Fetch last rows using tail(default=5) rows
df.tail(3)

Unnamed: 0,date,min temp,max temp,avg temp
1,7/5/2019,23,42,38
2,8/5/2019,20,33,28
3,9/5/2019,22,35,30


In [7]:
# Get column titles
df.columns

Index(['date', 'min temp', 'max temp', 'avg temp'], dtype='object')

In [10]:
# Slicing df[m:n] : Fetch records from m row to n row(exclusive)
df[2:4]

Unnamed: 0,date,min temp,max temp,avg temp
2,8/5/2019,20,33,28
3,9/5/2019,22,35,30


In [11]:
# Accessing a column
df.date

0    6/5/2019
1    7/5/2019
2    8/5/2019
3    9/5/2019
Name: date, dtype: object

In [12]:
# Another way to access columns
df["date"]

0    6/5/2019
1    7/5/2019
2    8/5/2019
3    9/5/2019
Name: date, dtype: object

In [13]:
# Fetch few columns
df[['date','avg temp']]

Unnamed: 0,date,avg temp
0,6/5/2019,32
1,7/5/2019,38
2,8/5/2019,28
3,9/5/2019,30


In [15]:
# Describe a column's statistical properties
df["avg temp"].describe()

count     4.000000
mean     32.000000
std       4.320494
min      28.000000
25%      29.500000
50%      31.000000
75%      33.500000
max      38.000000
Name: avg temp, dtype: float64

In [16]:
# Find the record with the maximum average temperature
df[df["avg temp"] == df["avg temp"].max()]

Unnamed: 0,date,min temp,max temp,avg temp
1,7/5/2019,23,42,38


In [18]:
# Find only the date for which avg temp was max
df["date"][df["avg temp"] == df["avg temp"].max()]

1    7/5/2019
Name: date, dtype: object