# Python Intro: Data Operations

In this section, you will learn how to import csv files, load data into Python data structures, examine the data and query it. 

Required datasets: 
- CityTemps.csv
- Regions.tsv

In [29]:
# Import the CSV python package
import csv

### Importing a CSV file into a list

In [30]:
# Initialize empty list
all_temps = []

In [31]:
# Open the csv file, 'rU' means: 'r' --> read mode (it won't let you change the file), 
#'U' --> universal newline support (so it's able to read files written with different newline characters) 
with open('CityTemps.csv', 'rU') as my_temps_file:
    my_temps_reader = csv.DictReader(my_temps_file)
    # add each row of the CSV file to the all_temps list. 
    for row in my_temps_reader:
        all_temps.append(row['temp'])

In [32]:
# print the contents of the list
print all_temps

['44', '38', '35', '31', '47', '42', '15', '22', '26', '30', '45', '65', '58', '37', '22', '19', '21', '11', '22', '27', '45', '12', '25', '23', '21', '2', '24', '8', '13', '11', '27', '24', '14', '27', '34', '31', '0', '26', '21', '28', '33', '24', '24', '38', '31', '24', '49', '44', '18', '7', '32', '33', '19', '9', '13', '14']


In [33]:
# number of elements
print len(all_temps)

56


In [35]:
# Correct way to cast strings as integers
# iterate through index, and typecast each element
for index in range(len(all_temps)):
    all_temps[index] = int(all_temps[index])

In [36]:
print all_temps

[44, 38, 35, 31, 47, 42, 15, 22, 26, 30, 45, 65, 58, 37, 22, 19, 21, 11, 22, 27, 45, 12, 25, 23, 21, 2, 24, 8, 13, 11, 27, 24, 14, 27, 34, 31, 0, 26, 21, 28, 33, 24, 24, 38, 31, 24, 49, 44, 18, 7, 32, 33, 19, 9, 13, 14]


In [37]:
print sum(all_temps)
print len(all_temps)
print sum(all_temps) / float(len(all_temps))

1485
56
26.5178571429


### Writing to file
Here, we will create and write to a file. Specifically, we will combine the information from two tables CityTemps and Regions into one table, such that each city also has a region associated with it.

In [38]:
attributes = ['city', 'state', 'lat', 'lng', 'temp', 'region', 'coastal']
temps_regions_file = open('OurTempsRegions.csv','w')  # 'w' stands for write mode

csvwriter = csv.DictWriter(temps_regions_file, delimiter=',', fieldnames=attributes)
csvwriter.writeheader()

cities = []
with open('CityTemps.csv', 'rU') as citytemps_file:
    for row in csv.DictReader(citytemps_file):
        cities.append(row)

regions = []
with open('Regions.tsv', 'rU') as regions_file:
    for row in csv.DictReader(regions_file, delimiter='\t'):
        regions.append(row)

# write to new file
for city in cities:
    for region in regions:
        if region['state'] == city['state']:
            city.update(region)
            csvwriter.writerow(city)
            break

temps_regions_file.close()

In [39]:
all_temps_regions = []
with open('OurTempsRegions.csv', 'rU') as my_temps_file:
    my_temps_reader = csv.DictReader(my_temps_file)
    for row in my_temps_reader:
        all_temps_regions.append(row)
print all_temps_regions

[{'city': 'Mobile', 'temp': '44', 'region': 'Southcentral', 'state': 'Alabama', 'coastal': 'Y', 'lat': '31.2', 'lng': '88.5'}, {'city': 'Montgomery', 'temp': '38', 'region': 'Southcentral', 'state': 'Alabama', 'coastal': 'Y', 'lat': '32.9', 'lng': '86.8'}, {'city': 'Phoenix', 'temp': '35', 'region': 'Mountain', 'state': 'Arizona', 'coastal': 'N', 'lat': '33.6', 'lng': '112.5'}, {'city': 'Little Rock', 'temp': '31', 'region': 'Southcentral', 'state': 'Arkansas', 'coastal': 'N', 'lat': '35.4', 'lng': '92.8'}, {'city': 'Los Angeles', 'temp': '47', 'region': 'Pacific', 'state': 'California', 'coastal': 'Y', 'lat': '34.3', 'lng': '118.7'}, {'city': 'San Francisco', 'temp': '42', 'region': 'Pacific', 'state': 'California', 'coastal': 'Y', 'lat': '38.4', 'lng': '123'}, {'city': 'Denver', 'temp': '15', 'region': 'Mountain', 'state': 'Colorado', 'coastal': 'N', 'lat': '40.7', 'lng': '105.3'}, {'city': 'New Haven', 'temp': '22', 'region': 'Northeast', 'state': 'Connecticut', 'coastal': 'Y', 'lat

###  Find Max temperature

In [45]:
max_value = -999999
for city_info in all_temps_regions:
    current_value = int(city_info['temp'])
    if  current_value > max_value:
        max_value = current_value
print max_value

65


In [46]:
#shorter version of writing it
print max([int(city_info['temp']) for city_info in all_temps_regions])

65


### Find all cities with 'ville' in them

In [41]:
for city_info in all_temps_regions:
    if 'ville' in city_info['city']:
        print 'City: {}, State: {}, Temp: {}'.format(city_info['city'], city_info['state'], city_info['temp'])

City: Jacksonville, State: Florida, Temp: 45
City: Louisville, State: Kentucky, Temp: 27
City: Nashville, State: Tennessee, Temp: 31


### Find pairs of cities such that lat1 > lat2  and temp1 > temp2

In [42]:
for i in range(len(all_temps_regions)):
    for j in range(i+1, len(all_temps_regions)):
        lat1 = float(all_temps_regions[i]['lat'])
        lat2 = float(all_temps_regions[j]['lat'])
        temp1 = float(all_temps_regions[i]['temp'])
        temp2 = float(all_temps_regions[j]['temp'])
        if (lat1 >= lat2 and temp1 > temp2) or (lat1 <= lat2 and temp1 < temp2):
            print '({}, {}) and ({}, {})'.format(
                all_temps_regions[i]['city'], temp1,
                all_temps_regions[j]['city'], temp2)

(Mobile, 44.0) and (Los Angeles, 47.0)
(Montgomery, 38.0) and (Los Angeles, 47.0)
(Montgomery, 38.0) and (San Francisco, 42.0)
(Phoenix, 35.0) and (Los Angeles, 47.0)
(Phoenix, 35.0) and (San Francisco, 42.0)
(Phoenix, 35.0) and (Atlanta, 37.0)
(Little Rock, 31.0) and (San Francisco, 42.0)
(Little Rock, 31.0) and (Albuquerque, 24.0)
(Little Rock, 31.0) and (Charlotte, 34.0)
(Little Rock, 31.0) and (Portland, 33.0)
(Little Rock, 31.0) and (Norfolk, 32.0)
(Little Rock, 31.0) and (Seattle, 33.0)
(Los Angeles, 47.0) and (Jacksonville, 45.0)
(Los Angeles, 47.0) and (Atlanta, 37.0)
(Los Angeles, 47.0) and (New Orleans, 45.0)
(Los Angeles, 47.0) and (Charleston, 38.0)
(Los Angeles, 47.0) and (Houston, 44.0)
(San Francisco, 42.0) and (Atlanta, 37.0)
(San Francisco, 42.0) and (Wichita, 22.0)
(San Francisco, 42.0) and (Albuquerque, 24.0)
(San Francisco, 42.0) and (Charlotte, 34.0)
(San Francisco, 42.0) and (Raleigh, 31.0)
(San Francisco, 42.0) and (Oklahoma City, 28.0)
(San Francisco, 42.0) and 