In [1]:
import requests
import pprint
import json
import datetime
from os import environ
import time
import pymongo
from pymongo import MongoClient
# Sign up for free API key at https://openweathermap.org/appid

## 3.1 Acquring data from an API

In [11]:
weather_key = environ['WEATHER_KEY']

In [2]:
dataframe = environ['DATA_FRAME']

In [3]:
city = 'london'

In [4]:
requestString = u'https://api.openweathermap.org/data/2.5/weather?q=%s&APPID=%s' % (city,weather_key)

In [5]:
res = requests.get(requestString)

In [6]:
res

<Response [200]>

In [7]:
res.reason

'OK'

In [8]:
pprint.pprint(res.json())

{'base': 'stations',
 'clouds': {'all': 0},
 'cod': 200,
 'coord': {'lat': 51.51, 'lon': -0.13},
 'dt': 1532308800,
 'id': 2643743,
 'main': {'humidity': 56,
          'pressure': 1016,
          'temp': 292.14,
          'temp_max': 295.15,
          'temp_min': 290.15},
 'name': 'London',
 'sys': {'country': 'GB',
         'id': 5091,
         'message': 0.0046,
         'sunrise': 1532319068,
         'sunset': 1532376121,
         'type': 1},
 'visibility': 10000,
 'weather': [{'description': 'clear sky',
              'icon': '01n',
              'id': 800,
              'main': 'Clear'}],
 'wind': {'deg': 240, 'speed': 2.6}}


In [9]:
def getData(city):
    requestString=u'https://api.openweathermap.org/data/2.5/weather?q=%s&APPID=%s' % (city, weather_key)
    res = requests.get(requestString)
    return res

In [10]:
res = getData('La Paz')

In [11]:
res

<Response [200]>

## 3.2 Ingesting data into MongoDB

### Create DB and collection

In [12]:
client = pymongo.MongoClient('dsmongo')

In [13]:
client.database_names()

['admin', 'config', 'local']

In [14]:
db = client.packt

In [15]:
weatherCollection = db.weather

In [16]:
client.database_names()

['admin', 'config', 'local']

In [17]:
res = weatherCollection.insert_one(res.json())

### Get list of cities

In [18]:
!wget http://bulk.openweathermap.org/sample/city.list.json.gz

--2018-07-22 15:16:33--  http://bulk.openweathermap.org/sample/city.list.json.gz
Resolving bulk.openweathermap.org (bulk.openweathermap.org)... 78.46.48.103
Connecting to bulk.openweathermap.org (bulk.openweathermap.org)|78.46.48.103|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4292641 (4.1M) [application/octet-stream]
Saving to: ‘city.list.json.gz’


2018-07-22 15:16:36 (1.85 MB/s) - ‘city.list.json.gz’ saved [4292641/4292641]



In [19]:
!gunzip city.list.json.gz

In [18]:
!head city.list.json

[
  {
    "id": 707860,
    "name": "Hurzuf",
    "country": "UA",
    "coord": {
      "lon": 34.283333,
      "lat": 44.549999
    }
  },


In [19]:
with open('city.list.json', 'r') as inFile:
    citiesJson=json.loads(inFile.read())

In [20]:
%ls

city.list.json  david-class-3.ipynb


In [21]:
citiesJson[0]

{'id': 707860,
 'name': 'Hurzuf',
 'country': 'UA',
 'coord': {'lon': 34.283333, 'lat': 44.549999}}

In [22]:
type(citiesJson)

list

### Limit to Chilean cities

#### You don't need to consume all of the memory to create a list up front, as long as the iterator returns the

#### same thing a list would during iteration.

#### https://stackoverflow.com/questions/12319025/filters-in-python3

In [23]:
#creates a python object that is a filter(pointer object) on whole citiesJson python object conserving memory
citiesJsonCL = list(filter(lambda x:x[u'country']==u'CL', citiesJson))

In [24]:
len(citiesJsonCL)

369

In [25]:
#extract just single item that is easier to pass in list map is another type of python pointer

cities = list(map(lambda x:x['name'], citiesJsonCL))
ids = list(map(lambda x:x['id'], citiesJsonCL))

In [26]:
#Pass the first city to get the data

res = getData(str(cities[0]))

In [27]:
res.reason

'OK'

In [28]:
#this is running the get command in function for 1st city in list
res

<Response [200]>

In [29]:
pprint.pprint(res.json())

{'base': 'stations',
 'clouds': {'all': 0},
 'cod': 200,
 'coord': {'lat': -43.12, 'lon': -73.62},
 'dt': 1532307600,
 'id': 3874943,
 'main': {'humidity': 87,
          'pressure': 1030,
          'temp': 279.15,
          'temp_max': 279.15,
          'temp_min': 279.15},
 'name': 'Puerto Quellon',
 'sys': {'country': 'CL',
         'id': 4661,
         'message': 0.0032,
         'sunrise': 1532348133,
         'sunset': 1532382407,
         'type': 1},
 'visibility': 10000,
 'weather': [{'description': 'clear sky',
              'icon': '01n',
              'id': 800,
              'main': 'Clear'}],
 'wind': {'deg': 350, 'speed': 1.5}}


In [30]:
print(cities[0:10])

['Puerto Quellon', 'Nacimiento', 'El Tabo', 'Región del Libertador General Bernardo O’Higgins', 'Lo Moscoso', 'Barón', 'Región de Los Lagos', 'Las Animas', 'Puerto Montt', 'Las Condes']


# Cicle through 1st to 10th cities in list and post to collection

In [31]:
## cycle through the two lists grab the data for city and insert into mongo db

for i, name in zip(ids[0:10], cities[0:10]):
    
    res = getData(str(name))
    
    if not res.status_code==200:
        print('Error grabbing data for {}, reason {}'.format(name, res.reason))
        #print(res.reason)
    
    else:
        try:
              weatherCollection.insert_one(res.json())
        except e:
              print('Error inserting to DB'.format(e))
              print('City {}'.format(name))
              
    time.sleep(1)
    #so we don't overload api

In [32]:
for doc in weatherCollection.find():
    pprint.pprint(doc)

{'_id': ObjectId('5b55362d0e2c640011d4099e'),
 'base': 'stations',
 'clouds': {'all': 5},
 'cod': 200,
 'coord': {'lat': 24.15, 'lon': -110.32},
 'dt': 1532306400,
 'id': 4000900,
 'main': {'humidity': 23,
          'pressure': 1009,
          'temp': 308.15,
          'temp_max': 308.15,
          'temp_min': 308.15},
 'name': 'La Paz',
 'sys': {'country': 'MX',
         'id': 3990,
         'message': 0.0038,
         'sunrise': 1532349977,
         'sunset': 1532398143,
         'type': 1},
 'visibility': 16093,
 'weather': [{'description': 'clear sky',
              'icon': '02n',
              'id': 800,
              'main': 'Clear'}],
 'wind': {'deg': 330, 'speed': 5.1}}
{'_id': ObjectId('5b55365d0e2c640011d4099f'),
 'base': 'stations',
 'clouds': {'all': 0},
 'cod': 200,
 'coord': {'lat': -43.12, 'lon': -73.62},
 'dt': 1532307600,
 'id': 3874943,
 'main': {'humidity': 87,
          'pressure': 1030,
          'temp': 279.15,
          'temp_max': 279.15,
          'temp_min': 2