In [1]:
import pandas as pd
print(pd.__version__)


0.22.0


#### Link for documentation of pandas read_csv:
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html#pandas.read_csv

In [118]:
data = pd.read_csv('nyc_weather.csv',parse_dates = True, index_col = [0] )
data.head()
data.loc['2016-01-02']

Temperature                36
DewPoint                   18
Humidity                   46
Sea Level PressureIn    30.02
VisibilityMiles            10
WindSpeedMPH                7
PrecipitationIn             0
CloudCover                  3
Events                    NaN
WindDirDegrees            275
Name: 2016-01-02 00:00:00, dtype: object

In [147]:
## Creating dataframe using dictionaries
weather_data = {
    'day':['01-07-2017', '01-08-2017'],
    'temp': [32,32],
    'windspeed': [6,7]
}
df = pd.DataFrame(weather_data)
df.head()

Unnamed: 0,day,temp,windspeed
0,01-07-2017,32,6
1,01-08-2017,32,7


In [154]:
df.set_index('day', inplace = True)
df.index.names = ['Date']

In [155]:
df.index = pd.to_datetime(df.index, format = '%d-%m-%Y')

In [156]:
df.head()

Unnamed: 0_level_0,temp,windspeed
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-07-01,32,6
2017-08-01,32,7


In [93]:
type(df.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [94]:
rows, cols = df.shape
print(rows, cols)

2 2


In [159]:
col_names = df.columns
print(col_names)

Index(['temp', 'windspeed'], dtype='object')


In [158]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2 entries, 2017-07-01 to 2017-08-01
Data columns (total 2 columns):
temp         2 non-null int64
windspeed    2 non-null int64
dtypes: int64(2)
memory usage: 48.0 bytes


In [112]:
data.head()

Unnamed: 0_level_0,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-01-01,38,23,52,30.03,10,8.0,0,5,,281
2016-01-02,36,18,46,30.02,10,7.0,0,3,,275
2016-01-03,40,21,47,29.86,10,8.0,0,1,,277
2016-01-04,25,9,44,30.05,10,9.0,0,3,,345
2016-01-05,20,-3,41,30.57,10,5.0,0,0,,333


In [119]:
data.index.name = 'Date'

In [120]:
data.head()

Unnamed: 0_level_0,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-01-01,38,23,52,30.03,10,8.0,0,5,,281
2016-01-02,36,18,46,30.02,10,7.0,0,3,,275
2016-01-03,40,21,47,29.86,10,8.0,0,1,,277
2016-01-04,25,9,44,30.05,10,9.0,0,3,,345
2016-01-05,20,-3,41,30.57,10,5.0,0,0,,333


In [128]:
print('min temp: %d,  max temp: %d'%(data.Temperature.min(),data.Temperature.max()))

min temp: 20,  max temp: 50


In [133]:
# sorting in descending order
data.Temperature.sort_values(ascending = False)[:10]

Date
2016-01-10    50
2016-01-16    47
2016-01-31    46
2016-01-09    44
2016-01-15    43
2016-01-26    43
2016-01-27    41
2016-01-03    40
2016-01-07    39
2016-01-08    39
Name: Temperature, dtype: int64

In [134]:
# to get stastical details about the dataset
data.describe()

Unnamed: 0,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,CloudCover,WindDirDegrees
count,31.0,31.0,31.0,31.0,31.0,28.0,31.0,31.0
mean,34.677419,17.83871,51.677419,29.992903,9.193548,6.892857,3.129032,247.129032
std,7.639315,11.378626,11.634395,0.237237,1.939405,2.871821,2.629853,92.308086
min,20.0,-3.0,33.0,29.52,1.0,2.0,0.0,34.0
25%,29.0,10.0,44.5,29.855,9.0,5.0,1.0,238.0
50%,35.0,18.0,50.0,30.01,10.0,6.5,3.0,281.0
75%,39.5,23.0,55.0,30.14,10.0,8.0,4.5,300.0
max,50.0,46.0,78.0,30.57,10.0,16.0,8.0,345.0


In [145]:
#  conditional selection
## Syntx = dataframe[cdf_columns >=< 'value'][the selection columns]
##          FROM TABLE   [ WHERE CLAUSE]  [[COLUMN SELECTIONS]]
temp_32 = data[data.Temperature >= 32][['DewPoint','Humidity']]
temp_32[:10]

Unnamed: 0_level_0,DewPoint,Humidity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-01,23,52
2016-01-02,18,46
2016-01-03,21,47
2016-01-06,4,35
2016-01-07,11,33
2016-01-08,29,64
2016-01-09,38,77
2016-01-10,46,71
2016-01-11,8,37
2016-01-12,15,53
