### Working with CSV Files and Analysing Data with Pandas

In [9]:
with open('weather_data.csv') as f:
    l = f.read().split()
l

['day,temp,condition',
 'Monday,12,Sunny',
 'Tuesday,14,Rain',
 'Wednesday,15,Rain',
 'Thursday,14,Cloudy',
 'Friday,21,Sunny',
 'Saturday,22,Sunny',
 'Sunday,24,Sunny']

##### import csv for Reading Comma Seperated Values File

In [20]:
import csv
with open('weather_data.csv') as f:
    temperatures = []
    data = csv.reader(f)
    for row in data: # csv reader creates csv_reader object which can be looped through it
        if row[1] != 'temp':
            temperatures.append(int(row[1]))

temperatures

[12, 14, 15, 14, 21, 22, 24]

##### pandas to read csv files which is more easier 

In [23]:
import pandas
data = pandas.read_csv('weather_data.csv')
print(data,'\n\n')
print(data['temp'])

         day  temp condition
0     Monday    12     Sunny
1    Tuesday    14      Rain
2  Wednesday    15      Rain
3   Thursday    14    Cloudy
4     Friday    21     Sunny
5   Saturday    22     Sunny
6     Sunday    24     Sunny 


0    12
1    14
2    15
3    14
4    21
5    22
6    24
Name: temp, dtype: int64


##### Data Frame vs Data Series
Data Frame is the like a frame of a whole database and Data Series is a list of a particular column in that database

In [24]:
data.to_dict()

{'day': {0: 'Monday',
  1: 'Tuesday',
  2: 'Wednesday',
  3: 'Thursday',
  4: 'Friday',
  5: 'Saturday',
  6: 'Sunday'},
 'temp': {0: 12, 1: 14, 2: 15, 3: 14, 4: 21, 5: 22, 6: 24},
 'condition': {0: 'Sunny',
  1: 'Rain',
  2: 'Rain',
  3: 'Cloudy',
  4: 'Sunny',
  5: 'Sunny',
  6: 'Sunny'}}

##### Calculations on temperature from the Csv file

In [39]:
from statistics import mean
data = pandas.read_csv('weather_data.csv')

print(round(mean(data['temp'].to_list()), 2)) # converting to list and importing mean to find average

print(round(data['temp'].mean(), 2),'\n') # does the same job

'''Maximum of Column'''
print(data['temp'].max())

'''Get Data from Columns'''
data['condition'], data.condition # can be treated as dictionary or an attribute 


17.43
17.43 

24


(0     Sunny
 1      Rain
 2      Rain
 3    Cloudy
 4     Sunny
 5     Sunny
 6     Sunny
 Name: condition, dtype: object,
 0     Sunny
 1      Rain
 2      Rain
 3    Cloudy
 4     Sunny
 5     Sunny
 6     Sunny
 Name: condition, dtype: object)

In [42]:
'''Getting Row'''
print(data[data.day == 'Monday'])

      day  temp condition
0  Monday    12     Sunny


In [44]:
'''Row with max temp'''
print(data[data.temp == data.temp.max()])

      day  temp condition
6  Sunday    24     Sunny


In [51]:
'''Monday's Temp to Fahrenheit'''
(int(data[data.day == 'Monday'].temp) * 9)/5 + 32 

53.6

In [53]:
'''Create dataframe from scratch'''
data_dict = {
    'students': ['Harsh', 'Gaurav', 'Kumar'],
    'scores': [45,56,67]
}

data = pandas.DataFrame(data_dict)
print(data)

  students  scores
0    Harsh      45
1   Gaurav      56
2    Kumar      67


In [54]:
'''saving csv to a new file'''
data.to_csv('student_scores.csv')

#### Analysing Squirrel Data with Pandas

In [5]:
import pandas

data = pandas.read_csv('2018_Central_Park_Squirrel_Census_-_Squirrel_Data.csv')
data['Primary Fur Color'].unique() # Three colors of squirrels present

array([nan, 'Gray', 'Cinnamon', 'Black'], dtype=object)

In [21]:
data_dict = {
    'Fur Color': ['Gray', 'Cinnamon', 'Black'], 
    'Count': [len(data[data['Primary Fur Color'] == 'Gray']), len(data[data['Primary Fur Color'] == 'Cinnamon']), len(data[data['Primary Fur Color'] == 'Black'])]
}

df = pandas.DataFrame(data_dict)
df.to_csv('squirrel_count.csv')