# DATA FRAMES BASICS

#### read csv

In [1]:
import pandas as pd
df = pd.read_csv("data\datasets_1655_4420_weather_data_nyc_centralpark_2016(1).csv")
df

Unnamed: 0,date,maximum temperature,minimum temperature,average temperature,precipitation,snow fall,snow depth
0,1-1-2016,42,34,38.0,0.00,0.0,0
1,2-1-2016,40,32,36.0,0.00,0.0,0
2,3-1-2016,45,35,40.0,0.00,0.0,0
3,4-1-2016,36,14,25.0,0.00,0.0,0
4,5-1-2016,29,11,20.0,0.00,0.0,0
...,...,...,...,...,...,...,...
361,27-12-2016,60,40,50.0,0,0,0
362,28-12-2016,40,34,37.0,0,0,0
363,29-12-2016,46,33,39.5,0.39,0,0
364,30-12-2016,40,33,36.5,0.01,T,0


#### read a dictionary data frame

In [74]:
weatherdata={
    'day':['1-1-2016','1-2-2016','1-3-2016','1-4-2016','1-5-2016','1-6-2016'],
    'temperature':[32,35,28,29,32,31],
    'windspeed':[6,7,2,7,4,2],
    'event':['rain','sunny','snow','snow','rain','sunny']
}
df = pd.DataFrame(weatherdata)
df

Unnamed: 0,day,temperature,windspeed,event
0,1-1-2016,32,6,rain
1,1-2-2016,35,7,sunny
2,1-3-2016,28,2,snow
3,1-4-2016,29,7,snow
4,1-5-2016,32,4,rain
5,1-6-2016,31,2,sunny


In [75]:
df.shape

(6, 4)

*dataframes are stored in tabular form hence rows and columns

##### storing the rows and columns of the dataframe 

In [34]:
rows,columns = df.shape
rows

6

In [35]:
columns

4

##### printing only the head and the tail

In [36]:
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,1-1-2016,32,6,rain
1,1-2-2016,35,7,sunny
2,1-3-2016,28,2,snow
3,1-4-2016,29,7,snow
4,1-5-2016,32,4,rain


In [37]:
df.head(3)

Unnamed: 0,day,temperature,windspeed,event
0,1-1-2016,32,6,rain
1,1-2-2016,35,7,sunny
2,1-3-2016,28,2,snow


In [38]:
df.tail(2)

Unnamed: 0,day,temperature,windspeed,event
4,1-5-2016,32,4,rain
5,1-6-2016,31,2,sunny


###### print row number 2 to 5

In [39]:
df[2:5]

Unnamed: 0,day,temperature,windspeed,event
2,1-3-2016,28,2,snow
3,1-4-2016,29,7,snow
4,1-5-2016,32,4,rain


#### printing the columns

In [40]:
df.columns #here we have four columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

##### printing the contents of day

In [41]:
df.day

0    1-1-2016
1    1-2-2016
2    1-3-2016
3    1-4-2016
4    1-5-2016
5    1-6-2016
Name: day, dtype: object

In [42]:
df['day']

0    1-1-2016
1    1-2-2016
2    1-3-2016
3    1-4-2016
4    1-5-2016
5    1-6-2016
Name: day, dtype: object

#### to know the type of the contents of a column

In [45]:
type(df['day']) #by default they are of type series

pandas.core.series.Series

#### print dataframe with only certain columns

In [47]:
df[['event','day']]

Unnamed: 0,event,day
0,rain,1-1-2016
1,sunny,1-2-2016
2,snow,1-3-2016
3,snow,1-4-2016
4,rain,1-5-2016
5,sunny,1-6-2016


# DATA FRAME OPERATIONS

In [48]:
df['temperature'].min()

28

In [49]:
df['temperature'].max()

35

In [50]:
df['temperature'].mean()

31.166666666666668

###### to get inferential stats on the numerical columns

In [51]:
df.describe() 

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,31.166667,4.666667
std,2.483277,2.33809
min,28.0,2.0
25%,29.5,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [52]:
df[df.temperature >= 32]

Unnamed: 0,day,temperature,windspeed,event
0,1-1-2016,32,6,rain
1,1-2-2016,35,7,sunny
4,1-5-2016,32,4,rain


In [53]:
df[df.temperature == df['temperature'].max()]

Unnamed: 0,day,temperature,windspeed,event
1,1-2-2016,35,7,sunny


In [56]:
maxtempDay = df['day'][df.temperature == df['temperature'].max()]
print(maxtempDay)

1    1-2-2016
Name: day, dtype: object


In [57]:
df.std()

temperature    2.483277
windspeed      2.338090
dtype: float64

# SET_INDEX

index is the number assigned for each row ussually on the first column 

In [71]:
df.index

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1-1-2016,32,6,rain
1-2-2016,35,7,sunny
1-3-2016,28,2,snow
1-4-2016,29,7,snow
1-5-2016,32,4,rain
1-6-2016,31,2,sunny


##### setting the index to a different column

In [76]:
df.set_index('day', inplace=True) #here the index is set to day column, inplace is used to modify the datatype 'df'
                                  #that way it makes it easy for locating data

In [73]:
df.loc['1-4-2016']

temperature      29
windspeed         7
event          snow
Name: 1-4-2016, dtype: object

In [77]:
#to reset the index to the previous then:
df.reset_index(inplace=True)