### Data Manipulation in Python

### Reading data from CSV files using csv package

In [3]:
import csv

In [3]:
# For compatibility across multiple platforms
import os
IB = os.environ.get('INSTABASE_URI',None) is not None
open = ib.open if IB else open

In [4]:
# Read Cities.csv data into default list format and print all rows
# Make sure data file is in same folder as notebook
# Note all values are read as strings
with open('Cities.csv',newline='') as f:
    rows = csv.reader(f)
    for r in rows:
        print(r)

['city', 'country', 'latitude', 'longitude', 'temperature']
['Aalborg', 'Denmark', '57.03', '9.92', '7.52']
['Aberdeen', 'United Kingdom', '57.17', '-2.08', '8.10']
['Abisko', 'Sweden', '63.35', '18.83', '0.20']
['Adana', 'Turkey', '36.99', '35.32', '18.67']
['Albacete', 'Spain', '39.00', '-1.87', '12.62']
['Algeciras', 'Spain', '36.13', '-5.47', '17.38']
['Amiens', 'France', '49.90', '2.30', '10.17']
['Amsterdam', 'Netherlands', '52.35', '4.92', '8.93']
['Ancona', 'Italy', '43.60', '13.50', '13.52']
['Andorra', 'Andorra', '42.50', '1.52', '9.60']
['Angers', 'France', '47.48', '-0.53', '10.98']
['Ankara', 'Turkey', '39.93', '32.86', '9.86']
['Antalya', 'Turkey', '36.89', '30.70', '11.88']
['Arad', 'Romania', '46.17', '21.32', '9.32']
['Athens', 'Greece', '37.98', '23.73', '17.41']
['Augsburg', 'Germany', '48.35', '10.90', '4.54']
['Bacau', 'Romania', '46.58', '26.92', '7.51']
['Badajoz', 'Spain', '38.88', '-6.97', '15.61']
['Baia Mare', 'Romania', '47.66', '23.58', '8.87']
['Balti', 'M

In [5]:
# Same as previous except use dictionary format
with open('Cities.csv',newline='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        print(r)

OrderedDict([('city', 'Aalborg'), ('country', 'Denmark'), ('latitude', '57.03'), ('longitude', '9.92'), ('temperature', '7.52')])
OrderedDict([('city', 'Aberdeen'), ('country', 'United Kingdom'), ('latitude', '57.17'), ('longitude', '-2.08'), ('temperature', '8.10')])
OrderedDict([('city', 'Abisko'), ('country', 'Sweden'), ('latitude', '63.35'), ('longitude', '18.83'), ('temperature', '0.20')])
OrderedDict([('city', 'Adana'), ('country', 'Turkey'), ('latitude', '36.99'), ('longitude', '35.32'), ('temperature', '18.67')])
OrderedDict([('city', 'Albacete'), ('country', 'Spain'), ('latitude', '39.00'), ('longitude', '-1.87'), ('temperature', '12.62')])
OrderedDict([('city', 'Algeciras'), ('country', 'Spain'), ('latitude', '36.13'), ('longitude', '-5.47'), ('temperature', '17.38')])
OrderedDict([('city', 'Amiens'), ('country', 'France'), ('latitude', '49.90'), ('longitude', '2.30'), ('temperature', '10.17')])
OrderedDict([('city', 'Amsterdam'), ('country', 'Netherlands'), ('latitude', '52.

In [6]:
# Print the city and longitude of all cities with longitude < 0
# Use dictionary format
with open('Cities.csv',newline='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        if float(r['longitude']) < 0:
            print(r['city'], r['longitude'])
# Show what happens without float()

Aberdeen -2.08
Albacete -1.87
Algeciras -5.47
Angers -0.53
Badajoz -6.97
Belfast -5.96
Bilbao -2.93
Birmingham -1.92
Blackpool -3.05
Bordeaux -0.60
Bournemouth -1.90
Bradford -1.75
Braga -8.42
Brest -4.50
Burgos -3.68
Caen -0.35
Cartagena -0.98
Cork -8.50
Dublin -6.25
Dundee -3.00
Edinburgh -3.22
Exeter -3.53
Galway -9.05
Glasgow -4.25
Granada -3.59
Huelva -6.93
Inverness -4.23
Lisbon -9.14
Madrid -3.68
Marbella -4.88
Murcia -1.13
Oviedo -5.83
Salamanca -5.67
Santander -3.80
Swansea -3.95
Valencia -0.40
Vigo -8.73
Zaragoza -0.89


In [7]:
# Same but using list format
with open('Cities.csv',newline ='') as f:
    rows = csv.reader(f)
    next(rows) # discard header row
    for r in rows:
        if float(r[3]) < 0:
            print(r[0], r[3])
# Show what happens without next(rows)

Aberdeen -2.08
Albacete -1.87
Algeciras -5.47
Angers -0.53
Badajoz -6.97
Belfast -5.96
Bilbao -2.93
Birmingham -1.92
Blackpool -3.05
Bordeaux -0.60
Bournemouth -1.90
Bradford -1.75
Braga -8.42
Brest -4.50
Burgos -3.68
Caen -0.35
Cartagena -0.98
Cork -8.50
Dublin -6.25
Dundee -3.00
Edinburgh -3.22
Exeter -3.53
Galway -9.05
Glasgow -4.25
Granada -3.59
Huelva -6.93
Inverness -4.23
Lisbon -9.14
Madrid -3.68
Marbella -4.88
Murcia -1.13
Oviedo -5.83
Salamanca -5.67
Santander -3.80
Swansea -3.95
Valencia -0.40
Vigo -8.73
Zaragoza -0.89


### <font color="green">Your Turn</font>

In [8]:
# Using Countries.csv and reading in dictionary format, find
# all countries that have coastline and are not in the EU.
# Print the countries and their populations.
with open('Countries.csv',newline='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        if r['coastline'] == "no" and r['EU'] == "no":
            print(r['country'], r['population'])

Andorra 0.07
Belarus 9.48
Kosovo 1.91
Liechtenstein 0.04
Macedonia 2.08
Moldova 4.06
Serbia 8.81
Switzerland 8.38


### Reading data into Python data structures

In [9]:
# Read Cities.csv data into list of dictionaries
cities = []
with open('Cities.csv',newline ='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        cities.append(r)
    print(cities)

[OrderedDict([('city', 'Aalborg'), ('country', 'Denmark'), ('latitude', '57.03'), ('longitude', '9.92'), ('temperature', '7.52')]), OrderedDict([('city', 'Aberdeen'), ('country', 'United Kingdom'), ('latitude', '57.17'), ('longitude', '-2.08'), ('temperature', '8.10')]), OrderedDict([('city', 'Abisko'), ('country', 'Sweden'), ('latitude', '63.35'), ('longitude', '18.83'), ('temperature', '0.20')]), OrderedDict([('city', 'Adana'), ('country', 'Turkey'), ('latitude', '36.99'), ('longitude', '35.32'), ('temperature', '18.67')]), OrderedDict([('city', 'Albacete'), ('country', 'Spain'), ('latitude', '39.00'), ('longitude', '-1.87'), ('temperature', '12.62')]), OrderedDict([('city', 'Algeciras'), ('country', 'Spain'), ('latitude', '36.13'), ('longitude', '-5.47'), ('temperature', '17.38')]), OrderedDict([('city', 'Amiens'), ('country', 'France'), ('latitude', '49.90'), ('longitude', '2.30'), ('temperature', '10.17')]), OrderedDict([('city', 'Amsterdam'), ('country', 'Netherlands'), ('latitud

In [10]:
# Print the city and longitude of all cities with longitude < 0
for city in cities:
    if float(city['longitude']) < 0:
        print(city['city'], city['longitude'])

Aberdeen -2.08
Albacete -1.87
Algeciras -5.47
Angers -0.53
Badajoz -6.97
Belfast -5.96
Bilbao -2.93
Birmingham -1.92
Blackpool -3.05
Bordeaux -0.60
Bournemouth -1.90
Bradford -1.75
Braga -8.42
Brest -4.50
Burgos -3.68
Caen -0.35
Cartagena -0.98
Cork -8.50
Dublin -6.25
Dundee -3.00
Edinburgh -3.22
Exeter -3.53
Galway -9.05
Glasgow -4.25
Granada -3.59
Huelva -6.93
Inverness -4.23
Lisbon -9.14
Madrid -3.68
Marbella -4.88
Murcia -1.13
Oviedo -5.83
Salamanca -5.67
Santander -3.80
Swansea -3.95
Valencia -0.40
Vigo -8.73
Zaragoza -0.89


In [11]:
# Print each city and whether in EU
# Must join cities with countries
# First read Countries.csv data int list of dictionaries
countries = []
with open('Countries.csv',newline ='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        countries.append(r)
print(countries)

[OrderedDict([('country', 'Albania'), ('population', '2.9'), ('EU', 'no'), ('coastline', 'yes')]), OrderedDict([('country', 'Andorra'), ('population', '0.07'), ('EU', 'no'), ('coastline', 'no')]), OrderedDict([('country', 'Austria'), ('population', '8.57'), ('EU', 'yes'), ('coastline', 'no')]), OrderedDict([('country', 'Belarus'), ('population', '9.48'), ('EU', 'no'), ('coastline', 'no')]), OrderedDict([('country', 'Belgium'), ('population', '11.37'), ('EU', 'yes'), ('coastline', 'yes')]), OrderedDict([('country', 'Bosnia and Herzegovina'), ('population', '3.8'), ('EU', 'no'), ('coastline', 'yes')]), OrderedDict([('country', 'Bulgaria'), ('population', '7.1'), ('EU', 'yes'), ('coastline', 'yes')]), OrderedDict([('country', 'Croatia'), ('population', '4.23'), ('EU', 'yes'), ('coastline', 'yes')]), OrderedDict([('country', 'Cyprus'), ('population', '1.18'), ('EU', 'yes'), ('coastline', 'yes')]), OrderedDict([('country', 'Czech Republic'), ('population', '10.55'), ('EU', 'yes'), ('coastli

In [12]:
# Now perform join
for city in cities:
    for country in countries:
        if city['country'] == country['country']:
            print(city['city'], country['EU'])
# add 'break' command to for-loop
# swap cities and countries

Aalborg yes
Aberdeen yes
Abisko yes
Adana no
Albacete yes
Algeciras yes
Amiens yes
Amsterdam yes
Ancona yes
Andorra no
Angers yes
Ankara no
Antalya no
Arad yes
Athens yes
Augsburg yes
Bacau yes
Badajoz yes
Baia Mare yes
Balti no
Barcelona yes
Bari yes
Basel no
Batman no
Belfast yes
Belgrade no
Bergamo yes
Bergen no
Berlin yes
Bialystok yes
Bielefeld yes
Bila Tserkva no
Bilbao yes
Birmingham yes
Blackpool yes
Bodo no
Bologna yes
Bonn yes
Bordeaux yes
Botosani yes
Bournemouth yes
Bradford yes
Braga yes
Braila yes
Bratislava yes
Bremen yes
Brest yes
Brest no
Brno yes
Brugge yes
Bucharest yes
Budapest yes
Burgas yes
Burgos yes
Bursa no
Bydgoszcz yes
Bytom yes
Caen yes
Cambridge yes
Cartagena yes
Catania yes
Chemnitz yes
Cherkasy no
Chernihiv no
Chernivtsi no
Chisinau no
Constanta yes
Cork yes
Cosenza yes
Craiova yes
Daugavpils yes
Debrecen yes
Denizli no
Dijon yes
Dublin yes
Dundee yes
Edinburgh yes
Edirne no
Elbasan no
Elblag yes
Erfurt yes
Erzincan no
Erzurum no
Eskisehir no
Exeter yes
F

In [13]:
# Compute overall average city temperature
temps = [] # create list of all temperatures
for city in cities:
    temps.append(float(city['temperature'])) 
print(sum(temps)/len(temps))

9.497840375586858


### Computing average directly using NumPy package

In [14]:
import numpy as np

In [15]:
# Compute overall average city temperature - now using np.average
temps = [] # create list of all temperatures
for city in cities:
    temps.append(float(city['temperature'])) 
print(np.average(temps))

9.497840375586854


In [16]:
# Compute average city temperature for each country
# First compute list of countries
countryList = []
for city in cities:
    if city['country'] not in countryList:
        countryList.append(city['country'])
# Then compute average temperature for each
for country in countryList:
    temps = []
    for city in cities:
        if city['country'] == country:
            temps.append(float(city['temperature']))
    print(country, np.average(temps))

Denmark 7.625
United Kingdom 8.649999999999999
Sweden 3.5866666666666673
Turkey 11.726666666666667
Spain 14.238333333333332
France 10.151111111111112
Netherlands 8.756666666666668
Italy 13.474666666666668
Andorra 9.6
Romania 9.224444444444444
Greece 16.9025
Germany 7.8692857142857155
Moldova 8.415
Switzerland 7.253333333333333
Serbia 9.85
Norway 3.7260000000000004
Poland 7.25
Ukraine 7.420000000000001
Portugal 14.469999999999999
Slovakia 8.48
Belarus 5.946666666666666
Czech Republic 7.8566666666666665
Belgium 9.65
Hungary 9.6025
Bulgaria 10.44
Ireland 9.299999999999999
Latvia 5.27
Albania 15.18
Austria 6.144
Finland 3.4875
Lithuania 6.1433333333333335
Slovenia 9.27
Montenegro 9.99
Croatia 10.865
Bosnia and Herzegovina 9.6
Macedonia 9.36
Estonia 4.59


In [17]:
# Or use Countries data
for country in countries:
    temps = []
    for city in cities:
        if city['country'] == country['country']:
            temps.append(float(city['temperature']))
    if len(temps) > 0:
        print(country['country'], np.average(temps))
# Add else: print country['country'], '- NO CITIES'

Albania 15.18
Andorra 9.6
Austria 6.144
Belarus 5.946666666666666
Belgium 9.65
Bosnia and Herzegovina 9.6
Bulgaria 10.44
Croatia 10.865
Czech Republic 7.8566666666666665
Denmark 7.625
Estonia 4.59
Finland 3.4875
France 10.151111111111112
Germany 7.8692857142857155
Greece 16.9025
Hungary 9.6025
Ireland 9.299999999999999
Italy 13.474666666666668
Latvia 5.27
Lithuania 6.1433333333333335
Macedonia 9.36
Moldova 8.415
Montenegro 9.99
Netherlands 8.756666666666668
Norway 3.7260000000000004
Poland 7.25
Portugal 14.469999999999999
Romania 9.224444444444444
Serbia 9.85
Slovakia 8.48
Slovenia 9.27
Spain 14.238333333333332
Sweden 3.5866666666666673
Switzerland 7.253333333333333
Turkey 11.726666666666667
Ukraine 7.420000000000001
United Kingdom 8.649999999999999


### <font color="green">Your Turn</font>

In [18]:
# Determine the average temperature for EU cities and the average
# temperature for non-EU cities, before and after "Brexit". That is,
# for one pair of averages assume the United Kingdom is in the EU,
# and for the other pair assume the United Kingdom is not in the EU.
# Print the four numbers and make sure to label which is which!
#
# Recommended data structures:
#
UK = [] # temperatures of cities in the United Kingdom
EU = [] # temperatures of cities in an EU country other than the United Kingdom
nonEU = [] # temperatures of cities in a non-EU country other than the UK
#
# Hint: Start with code in earlier example for joining cities and countries
# Hint: Remember you can combine two lists using "+"
# Note: Less than 10 lines of code are needed, not counting printing, and
# assuming cities and countries data has already been loaded into lists.
for country in countries:
    for city in cities:
        if city['country'] == country['country']:
            if city['country'] == 'United Kingdom':
                UK.append(float(city['temperature']))
            elif country['EU'] == 'yes':
                EU.append(float(city['temperature']))
            else:
                nonEU.append(float(city['temperature']))
print('EU before Brexit:', np.average(EU + UK))
print('Non-EU before Brexit:', np.average(nonEU))
print('EU after Brexit:', np.average(EU))
print('Non-EU after Brexit:', np.average(nonEU + UK))

EU before Brexit: 9.694133333333333
Non-EU before Brexit: 9.03047619047619
EU after Brexit: 9.793211678832117
Non-EU after Brexit: 8.965394736842107


### Minimum and maximum

In [19]:
# Overall minimum and maximum temperatures
temps = [] # create list of all temperatures
for city in cities:
    temps.append(float(city['temperature'])) 
print('Minimum:', min(temps))
print('Maximum:', max(temps))

Minimum: -2.2
Maximum: 18.67


In [20]:
# Alternative method
minval = 100.00 # greater than any possible minimum
maxval = -100.00 # smaller than any possible maximum
for city in cities:
    if float(city['temperature']) < minval:
        minval = float(city['temperature'])
    if float(city['temperature']) > maxval:
        maxval = float(city['temperature'])
print('Minimum:', minval)
print('Maximum:', maxval)

Minimum: -2.2
Maximum: 18.67


### <font color="green">Your Turn</font>

In [21]:
# Determine which country has the lowest average city temperature
# and which country has the highest average city temperature.
# Print the two countries and their average temperatures.
# Hint: Start with code above that computes average temperatures
# for each country, then incorporate the running min/max method.
#
minval = 100.00
mincountry = ''
maxval = -100.00
maxcountry = ''
for country in countries:
    temps = []
    for city in cities:
        if city['country'] == country['country']:
            temps.append(float(city['temperature']))
    if len(temps) > 0:
        avg = np.average(temps)
        if avg < minval:
            minval = avg
            mincountry = country['country']
        if avg > maxval:
            maxval = avg
            maxcountry = country['country']
print('Minimum average temperature:', mincountry, 'with', minval)
print('Maximum average temperature:', maxcountry, 'with', maxval)

Minimum average temperature: Finland with 3.4875
Maximum average temperature: Greece with 16.9025


### <font color="green">Your Turn: World Cup Data</font>

In [22]:
# Read Players.csv and Teams.csv into lists of dictionaries
players = []
with open('Players.csv',newline ='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        players.append(r)
teams = []
with open('Teams.csv',newline ='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        teams.append(r)

In [23]:
# What player on a team with “ia” in the team name played less than
# 200 minutes and made more than 100 passes? Print the player surname.
# Note: In Python, use "'abc' in s" to check whether string s contains'abc'
# Reminder: Convert minutes and passes to integers before comparing to values
for player in players:
    if 'ia' in player['team'] and int(player['minutes']) < 200 and\
        int(player['passes']) > 100:
            print(player['surname'])

Kuzmanovic


In [24]:
# What is the average number of passes made by forwards? By midfielders?
# Make sure to label which is which.
forwards = []
midfielders = []
for player in players:
    if player['position'] == 'forward':
        forwards.append(int(player['passes']))
    elif player['position'] == 'midfielder':
        midfielders.append(int(player['passes']))
print('Average passes by forwards:', np.average(forwards))
print('Average passes by midfielders:', np.average(midfielders))

Average passes by forwards: 50.82517482517483
Average passes by midfielders: 95.2719298245614


In [25]:
# Which team has the highest ratio of goalsFor to goalsAgainst?
# Print the team only.
# Reminder: Use float() to make sure you're doing floating point division
# Hint: Use two variables to keep track of highest ratio seen so far
# and team with that ratio:
ratio = 0 # highest ratio seen so far
ratioteam = '' # team with highest ratio

for team in teams:
    nextratio = float(team['goalsFor'])/float(team['goalsAgainst'])
    if nextratio > ratio:
        ratio = nextratio
        ratioteam = team['team']
print(ratioteam)

Portugal


In [26]:
# How many players who play on a team with ranking <10 played
# more than 350 minutes?
# Reminder: Convert ranking and minutes to integers before comparing to values
# Hint: Compute join of players and teams, using a variable to count number of
# players satisfying requirement
count = 0
for player in players:
    for team in teams:
        if player['team'] == team['team']:
            if int(team['ranking']) < 10 and int(player['minutes']) > 350:
                count += 1
print(count)

54


In [27]:
# BONUS!
# Write a loop that interactively asks the user to enter a team name.
# If the team exists, print how many games the team played, how many
# yellow cards and red cards the team had, and the average number of
# minutes played by players on that team.
# If the team doesn't exist, print "Team not in 2010 World Cup".
# If 'quit' is entered, terminate the loop.
# Note: To read a string from the user instead of a number, use
# raw_input() instead of input()
info = {} # dictionary with key = team and value = (games,yellowCards,redCards)
for team in teams:
    info[team['team']] = (team['games'],team['yellowCards'],team['redCards'])
minutes = {} # dictionary with key = team and value = list of minutes
for player in players:
    if player['team'] in minutes:
        minutes[player['team']].append(float(player['minutes']))
    else:
        minutes[player['team']] = [float(player['minutes'])]
while True:
    inputteam = input('Enter a team, or enter quit to stop: ')
    if inputteam == 'quit': break
    elif inputteam not in info:
        print('Team not in 2010 World Cup')
    else:
        print("Games:", info[inputteam][0])
        print("Yellow cards:", info[inputteam][1])
        print("Red cards:", info[inputteam][2])
        print("Average minutes per player:", np.average(minutes[inputteam]))

Enter a team, or enter quit to stop: Portugal
Games: 4
Yellow cards: 8
Red cards: 1
Average minutes per player: 197.95
Enter a team, or enter quit to stop: quit


### <font color="purple">More practice on numpy</font>
Quick overview on numpy library
https://docs.scipy.org/doc/numpy/user/quickstart.html

In [28]:
# Create a null vector of size 10 but the fifth value which is 1
x = np.zeros(10)
x[4] = 1
print(x)

[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]


In [29]:
# Create a vector with values ranging from 10 to 49
x = np.arange(10, 50)
print(x)

[10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49]


In [30]:
# Create a 3x3 matrix with values ranging from 0 to 8
x = np.arange(9).reshape(3,3)
print(x)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [31]:
# Create a 10x10 array with random values and find the minimum, maximum and mean values
x = np.random.random((10,10))
minx = x.min()
maxx = x.max()
meanx = x.mean()
print(minx, maxx, meanx)

0.020985190492497807 0.9942336058438827 0.5294564734901163


In [32]:
# Multiply a 5x3 matrix by a 3x2 matrix (real matrix product)
x = np.dot(np.random.random((5,3)), np.random.random((3,2)))
print(x)

[[0.54493094 0.59146137]
 [0.65304651 0.45444535]
 [0.47827482 0.32356791]
 [0.61841946 0.45291906]
 [0.92859425 0.84492914]]


In [33]:
# Create a random vector of size 10 and sort it 
x = np.random.random(10)
x = np.sort(x)
print(x)

[0.08774062 0.09041016 0.10517501 0.32881589 0.34408192 0.47185145
 0.51371087 0.59729658 0.9079778  0.92551577]


In [34]:
# Extract all odd numbers from arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]). Desired output: array([1, 3, 5, 7, 9])
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[arr % 2 == 1]

array([1, 3, 5, 7, 9])

In [35]:
# Get the positions where elements of a = np.array([1,2,3,2,3,4,3,4,5,6]) and b = np.array([7,2,10,2,7,4,9,4,9,8]) match. Desired output: array([1, 3, 5, 7])
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.where(a == b)

(array([1, 3, 5, 7], dtype=int64),)

In [36]:
# Get all items between 5 and 10 from a = np.array([2, 6, 1, 9, 10, 3, 27]). Desired Output: array([6, 9, 10]).
a = np.array([2, 6, 1, 9, 10, 3, 27])
index = np.where((a >= 5) & (a <= 10))
a[index]

array([ 6,  9, 10])

In [37]:
# Get 5 evenly spaced numbers over the interval [3,33]. Desired output [ 3.  10.5 18.  25.5 33. ]
x = np.linspace(3, 33, num = 5)
print(x)

[ 3.  10.5 18.  25.5 33. ]


Additional questions to play 
https://www.machinelearningplus.com/python/101-numpy-exercises-python/

### <font color="green">Your Turn: Arctic Data</font>

In [4]:
arctic_data = []
with open('ArcticData.csv',newline ='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        arctic_data.append(r)
        print(r)

OrderedDict([('Month', '1'), ('Day', '1'), ("1980's Average", '13838002'), ("1990's Average", '13479932'), ("2000's Average", '12884425'), ('2002', '-9999'), ('2003', '13142332'), ('2004', '13009467'), ('2005', '12573869'), ('2006', '12642784'), ('2007', '12629637'), ('2008', '12637938'), ('2009', '12555408'), ('2010', '12548660'), ('2011', '12278751'), ('2012', '12746295'), ('2013', '12362561'), ('2014', '12420072'), ('2015', '12504555'), ('2016', '12234960'), ('2017', '12074998'), ('2018', '11945173'), ('2019', '12287194')])
OrderedDict([('Month', '1'), ('Day', '2'), ("1980's Average", '13894654'), ("1990's Average", '13507159'), ("2000's Average", '12933153'), ('2002', '-9999'), ('2003', '13213231'), ('2004', '13030016'), ('2005', '12624615'), ('2006', '12689716'), ('2007', '12669943'), ('2008', '12630839'), ('2009', '12610057'), ('2010', '12572785'), ('2011', '12350377'), ('2012', '12786647'), ('2013', '12327112'), ('2014', '12505424'), ('2015', '12469040'), ('2016', '12237239'), (

OrderedDict([('Month', '12'), ('Day', '5'), ("1980's Average", '12436576'), ("1990's Average", '11961229'), ("2000's Average", '11278309'), ('2002', '11461042'), ('2003', '11265694'), ('2004', '11522508'), ('2005', '10996110'), ('2006', '10637874'), ('2007', '10790876'), ('2008', '11262074'), ('2009', '11105626'), ('2010', '10652221'), ('2011', '10851780'), ('2012', '10826504'), ('2013', '11178473'), ('2014', '11195344'), ('2015', '10780081'), ('2016', '10046572'), ('2017', '10626628'), ('2018', '10641797'), ('2019', '-9999')])
OrderedDict([('Month', '12'), ('Day', '6'), ("1980's Average", '12496564'), ("1990's Average", '12029964'), ("2000's Average", '11347817'), ('2002', '11483087'), ('2003', '11341579'), ('2004', '11594020'), ('2005', '11095587'), ('2006', '10758663'), ('2007', '10898634'), ('2008', '11357204'), ('2009', '11091212'), ('2010', '10759069'), ('2011', '10999805'), ('2012', '10927106'), ('2013', '11185253'), ('2014', '11240974'), ('2015', '10856237'), ('2016', '10084594

In [39]:
# For the entire data set, find the maximum and minimum values
entire = []
with open('ArcticData.csv',newline='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        del r['Month']
        del r['Day']
        for x in r.values():
            entire.append(int(x))
print("Maximum: ", max(entire))
print("Minimum: ", min(entire))

Maximum:  15602024
Minimum:  -9999


In [40]:
# Find the maximum and minimum values for 2012
y2012 = []
with open('ArcticData.csv',newline='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        y2012.append(int(r['2012']))
print("Maximum for 2012: ", max(y2012))
print("Minimum for 2012: ", min(y2012))

Maximum for 2012:  14709086
Minimum for 2012:  3177455


In [41]:
# Find the maximum and the minimum in January of 2005.
Jan2005 = []
with open('ArcticData.csv',newline='') as f:
    rows = csv.DictReader(f)
    for r in rows:
        if int(r['Month']) == 1:
            Jan2005.append(int(r['2005']))
print("Maximum for January 2005: ", max(Jan2005))
print("Minimum for January 2005: ", min(Jan2005))

Maximum for January 2005:  13601280
Minimum for January 2005:  12573869


In [5]:
# Find the decadal average for each day in June from 2002 to 2012.
for val in arctic_data:
    if int(val['Month']) == 6:
        avg = 0
        count = 0
        for y in range(2002, 2013):
            avg += int(val[str(y)])
            count += 1
        avg = avg/count
        print(val['Day'], avg)

1 10383623.0
2 10341862.272727273
3 10298238.0
4 10259105.363636363
5 10207117.909090908
6 10147670.272727273
7 10090529.272727273
8 10029252.454545455
9 9979887.0
10 9934902.909090908
11 9895790.090909092
12 9846698.363636363
13 9794329.181818182
14 9737270.181818182
15 9680397.727272727
16 9621399.0
17 9563643.272727273
18 9497755.727272727
19 9433003.545454545
20 9367673.727272727
21 10292100.545454545
22 10230411.636363637
23 10162437.181818182
24 10079229.727272727
25 10000000.727272727
26 9921598.454545455
27 9853740.454545455
28 9776817.090909092
29 9697612.090909092
30 9623745.909090908


In [43]:
# Find the average values for each month in 2011.
months = {}

for m in range(1, 13):
    months[str(m)] = {'days': 0, 'sum':0}
    
for val in arctic_data:
    months[val['Month']]['days'] += 1
    months[val['Month']]['sum'] += float(val['2011'])
    
for m in range(1, 13):
    avg = months[str(m)]['sum']/months[str(m)]['days']
    print(m, avg)

1 12895995.290322581
2 13315792.0
3 13978389.741935484
4 13500023.266666668
5 12023642.870967742
6 10143039.533333333
7 7448535.838709678
8 5356691.645161291
9 4469271.766666667
10 6188102.612903226
11 9397431.8
12 11635959.903225806
