# Deparment of Agronomy Iowa State University
https://mesonet.agron.iastate.edu/request/download.phtml?network=BR__ASOS

In [12]:
"""
Script that scrapes data from the IEM ASOS download service
"""
import json
import datetime
import urllib2

# timestamps in UTC to request data for
startts = datetime.datetime(2015, 1, 1)
endts = datetime.datetime(2016, 9, 1)

SERVICE = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"
SERVICE += "data=all&tz=Etc/UTC&format=comma&latlon=yes&"

SERVICE += startts.strftime('year1=%Y&month1=%m&day1=%d&')
SERVICE += endts.strftime('year2=%Y&month2=%m&day2=%d&')

# states = """AK AL AR AZ CA CO CT DE FL GA HI IA ID IL IN KS KY LA MA MD ME
#  MI MN MO MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT
#  WA WI WV WY"""

# IEM quirk to have Iowa AWOS sites in its own labeled network
networks = ['BR__ASOS']  ### Brazil Code

for state in states.split():
    networks.append("%s_ASOS" % (state,))

for network in networks:
    # Get metadata
    uri = "http://mesonet.agron.iastate.edu/geojson/network.php?network=%s" % (
                                                                    network,)
    data = urllib2.urlopen(uri)
    jdict = json.load(data)
    for site in jdict['features']:
        faaid = site['properties']['sid']
        sitename = site['properties']['sname']
        uri = '%s&station=%s' % (SERVICE, faaid)
        print 'Network: %s Downloading: %s [%s]' % (network, sitename, faaid)
        data = urllib2.urlopen(uri)
        outfn = '%s_%s_%s.txt' % (faaid, startts.strftime("%Y%m%d%H%M"),
                                  endts.strftime("%Y%m%d%H%M"))
        out = open(outfn, 'w')
        out.write(data.read())
        out.close()

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']
sns.set_style("white")

In [3]:
## Reading all the files at once
import os  ## Need packages to import various files
import glob ## Need packages to import various files

path = "/Users/Javier/Desktop/DSI-SF-1-MAJACACI00/projects/capstone/Zika_data/brazil_weather_data/"
allFiles = glob.glob(os.path.join(path,"*.txt"))

np_array_list = []
for file_ in allFiles:
    df = pd.read_csv(file_,skiprows=5)
    np_array_list.append(df.as_matrix())

comb_np_array = np.vstack(np_array_list)
brazil_weather_master = pd.DataFrame(comb_np_array, columns = ['station', 'valid', 'lon', 'lat', 'tmpf', 'dwpf', 'relh', 'drct',
                                                      'sknt', 'p01i', 'alti', 'mslp', 'vsby', 'gust', 'skyc1', 'skyc2', 
                                                      'skyc3', 'skyc4', 'skyl1', 'skyl2', 'skyl3', 'skyl4','presentwx',
                                                      'metar'])


In [4]:
print brazil_weather_master.shape
print "++++++++++++++"
print brazil_weather_master.info()
print "+++++++++++++"
print brazil_weather_master.isnull().sum()
print "+++++++++++++"
print brazil_weather_master.valid.min()
print brazil_weather_master.valid.max()
print "+++++++++++++"
brazil_weather_master.head(2)

(1128698, 24)
++++++++++++++
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1128698 entries, 0 to 1128697
Data columns (total 24 columns):
station      1128698 non-null object
valid        1128698 non-null object
lon          1128698 non-null object
lat          1128698 non-null object
tmpf         1128698 non-null object
dwpf         1128698 non-null object
relh         1128698 non-null object
drct         1128698 non-null object
sknt         1128698 non-null object
p01i         1128698 non-null object
alti         1128698 non-null object
mslp         1128698 non-null object
vsby         1128698 non-null object
gust         1128698 non-null object
skyc1        1128698 non-null object
skyc2        1128698 non-null object
skyc3        1128698 non-null object
skyc4        1128698 non-null object
skyl1        1128698 non-null object
skyl2        1128698 non-null object
skyl3        1128698 non-null object
skyl4        1128698 non-null object
presentwx    1128698 non-null object
metar  

Unnamed: 0,station,valid,lon,lat,tmpf,dwpf,relh,drct,sknt,p01i,...,skyc1,skyc2,skyc3,skyc4,skyl1,skyl2,skyl3,skyl4,presentwx,metar
0,SBAA,2015-01-02 11:00,-49.2833,-8.25,84.2,77.0,79.04,190.0,3.0,0,...,M,M,M,M,M,M,M,M,M,SBAA 021100Z 19003KT CAVOK 29/25 Q1014
1,SBAA,2015-01-02 12:00,-49.2833,-8.25,87.8,75.2,66.35,0.0,0.0,0,...,M,M,M,M,M,M,M,M,M,SBAA 021200Z 00000KT CAVOK 31/24 Q1014


In [5]:
## Variables needed for the mosquito outbreak
# station,valid, lon, lat, 
# tmpf: Air Temperature in Fahrenheit, typically @ 2 meters
# dwpf: Dew Point Temperature in Fahrenheit, typically @ 2 meters
# dew point is a true measurement of the atmospheric moisture
# relh: Relative Humidity in %
# drct: Wind Direction in degrees from north
# sknt: sknt: Wind Speed in knots
# p01i: One hour precipitation for the period from the observation time to the time of the previous hourly precipitation reset. 
# alti: Pressure altimeter in inches

brazil_weather = brazil_weather_master[['station','valid','lon','lat','tmpf','dwpf', 'relh','drct','sknt','p01i', 'alti']]
import cPickle

f = open('brazil_weather.save', 'wb')
cPickle.dump(brazil_weather, f)
f.close()


## No Missing values on lon, lat, p01i

In [6]:
print brazil_weather.shape
print "\n=================\n"
print brazil_weather.info()

(1128698, 11)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1128698 entries, 0 to 1128697
Data columns (total 11 columns):
station    1128698 non-null object
valid      1128698 non-null object
lon        1128698 non-null object
lat        1128698 non-null object
tmpf       1128698 non-null object
dwpf       1128698 non-null object
relh       1128698 non-null object
drct       1128698 non-null object
sknt       1128698 non-null object
p01i       1128698 non-null object
alti       1128698 non-null object
dtypes: object(11)
memory usage: 94.7+ MB
None


In [75]:
## Missing values on tmpf: Air Temperature in Fahrenheit, typically @ 2 meters
brazil_weather[brazil_weather.tmpf=="M"].head(3)

Unnamed: 0,station,valid,lon,lat,tmpf,dwpf,relh,drct,sknt,p01i,alti
1693,SBAA,2015-09-02 13:00,-49.2833,-8.25,M,M,M,M,M,0,29.97
1694,SBAA,2015-09-02 14:00,-49.2833,-8.25,M,M,M,M,M,0,29.94
1695,SBAA,2015-09-02 15:00,-49.2833,-8.25,M,M,M,M,M,0,29.91


In [11]:
len (brazil_weather.station.unique().tolist())

126

In [76]:
# relh: Relative Humidity in %
brazil_weather[brazil_weather.relh=="M"].head(3)

Unnamed: 0,station,valid,lon,lat,tmpf,dwpf,relh,drct,sknt,p01i,alti
1693,SBAA,2015-09-02 13:00,-49.2833,-8.25,M,M,M,M,M,0,29.97
1694,SBAA,2015-09-02 14:00,-49.2833,-8.25,M,M,M,M,M,0,29.94
1695,SBAA,2015-09-02 15:00,-49.2833,-8.25,M,M,M,M,M,0,29.91


In [77]:
# drct: Wind Direction in degrees from north
brazil_weather[brazil_weather.drct=="M"].head(3)

Unnamed: 0,station,valid,lon,lat,tmpf,dwpf,relh,drct,sknt,p01i,alti
1693,SBAA,2015-09-02 13:00,-49.2833,-8.25,M,M,M,M,M,0,29.97
1694,SBAA,2015-09-02 14:00,-49.2833,-8.25,M,M,M,M,M,0,29.94
1695,SBAA,2015-09-02 15:00,-49.2833,-8.25,M,M,M,M,M,0,29.91


In [78]:
# dwpf: Dew Point Temperature in Fahrenheit, typically @ 2 meters
brazil_weather[brazil_weather.dwpf=="M"].head(3)

Unnamed: 0,station,valid,lon,lat,tmpf,dwpf,relh,drct,sknt,p01i,alti
1693,SBAA,2015-09-02 13:00,-49.2833,-8.25,M,M,M,M,M,0,29.97
1694,SBAA,2015-09-02 14:00,-49.2833,-8.25,M,M,M,M,M,0,29.94
1695,SBAA,2015-09-02 15:00,-49.2833,-8.25,M,M,M,M,M,0,29.91


In [79]:
# sknt: Wind Speed in knots
brazil_weather[brazil_weather.sknt=="M"].head(3)

Unnamed: 0,station,valid,lon,lat,tmpf,dwpf,relh,drct,sknt,p01i,alti
1693,SBAA,2015-09-02 13:00,-49.2833,-8.25,M,M,M,M,M,0,29.97
1694,SBAA,2015-09-02 14:00,-49.2833,-8.25,M,M,M,M,M,0,29.94
1695,SBAA,2015-09-02 15:00,-49.2833,-8.25,M,M,M,M,M,0,29.91


In [80]:
# p01i: One hour precipitation for the period from the observation time to the time of the 
# previous hourly precipitation reset. 
brazil_weather[brazil_weather.p01i=="M"].head(3)

Unnamed: 0,station,valid,lon,lat,tmpf,dwpf,relh,drct,sknt,p01i,alti


In [81]:
# alti: Pressure altimeter in inches
brazil_weather[brazil_weather.alti=="M"].head(3)

Unnamed: 0,station,valid,lon,lat,tmpf,dwpf,relh,drct,sknt,p01i,alti
4214,SBAF,2015-01-29 11:00,-43.3847,-22.8751,M,M,M,320.0,2.0,0,M
4215,SBAF,2015-01-29 12:00,-43.3847,-22.8751,M,M,M,300.0,3.0,0,M
4216,SBAF,2015-01-29 13:00,-43.3847,-22.8751,M,M,M,50.0,5.0,0,M


In [83]:
## I am not going to replace the missing values yet, want to replace them for the mean once I subset the datasets.

In [84]:
#brazil_weather.mslp = brazil_weather.mslp.map(lambda x: x.replace("M", '0'))
brazil_weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1128698 entries, 0 to 1128697
Data columns (total 11 columns):
station    1128698 non-null object
valid      1128698 non-null object
lon        1128698 non-null object
lat        1128698 non-null object
tmpf       1128698 non-null object
dwpf       1128698 non-null object
relh       1128698 non-null object
drct       1128698 non-null object
sknt       1128698 non-null object
p01i       1128698 non-null object
alti       1128698 non-null object
dtypes: object(11)
memory usage: 94.7+ MB


In [132]:
# brazil_weather.station=="SBAM"

### Acre

In [94]:
acre_weather = brazil_weather[brazil_weather['station'].isin(['SBCZ','SBTK','SBRB'])]
acre_weather.station.unique()


array(['SBCZ', 'SBRB', 'SBTK'], dtype=object)

### Sao Paulo

In [96]:
sao_paulo_weather = brazil_weather[brazil_weather['station'].isin(['SBDN', 'SBAS', 'SBUP', 'SBFT', 'SBBT', 'SBPC', 
                                                                   'SBRS', 'SBAU', 'SDVG', 'SBML', 'SBLN', 'SBSR', 
                                                                   'SBGP', 'SBBU', 'SBRP', 'SBAQ', 'SBSA', 'SBYS', 
                                                                   'SBKP', 'SBJD', 'SBRQ', 'SBSP', 'SBMT', 'SBGR', 
                                                                   'SBST', 'SBSJ', 'SBTA', 'SBGW'])]
sao_paulo_weather.station.unique()

array(['SBAQ', 'SBAU', 'SBBU', 'SBDN', 'SBGR', 'SBGW', 'SBJD', 'SBKP',
       'SBML', 'SBMT', 'SBPC', 'SBRP', 'SBSJ', 'SBSP', 'SBSR', 'SBST',
       'SBTA', 'SBYS'], dtype=object)

### Amazonas

In [99]:
amazonas_weather = brazil_weather[brazil_weather['station'].isin(['SBBA', 'SBER', 'SBBC', 'SBTT', 'SBYA', 'SBUA', 
                                                                  'SWBC', 'SWPI', 'SBIC', 'SBEG', 'SBMY', 'SBUY', 
                                                                  'SBUI', 'SBTF', 'SBMN','SBEG'])]
amazonas_weather.station.unique()

array(['SBEG', 'SBIC', 'SBMN', 'SBMY', 'SBTF', 'SBTT', 'SBUA', 'SBUY',
       'SBYA'], dtype=object)

### Espirito Santo

In [100]:
esp_santo_weather = brazil_weather[brazil_weather['station'].isin (['SBVT'])]
esp_santo_weather.station.unique()

array(['SBVT'], dtype=object)

### Amapa

In [102]:
amapa_weather = brazil_weather[brazil_weather['station'].isin(['SBMQ','SBAM','SBOI'])]
amapa_weather.station.unique()

array(['SBMQ', 'SBOI'], dtype=object)

### Sergipe 

In [105]:
sergipe_weather = brazil_weather[brazil_weather['station'].isin(['SBAR'])]
sergipe_weather.station.unique()

array(['SBAR'], dtype=object)

### Mato Grosso

In [106]:
mato_groso = brazil_weather[brazil_weather['station'].isin(['SBCY','SBVH','SBAT','SBXI','SBSY','SBBW','SBXV','SBXG'])]
mato_groso.station.unique()

array(['SBAT', 'SBBW', 'SBCY', 'SBVH'], dtype=object)

### Piaui

In [107]:
piaui_weather = brazil_weather[brazil_weather['station'].isin(['SBTE', 'SBPB'])]
piaui_weather.station.unique()

array(['SBPB', 'SBTE'], dtype=object)

### Santa Catarina

In [108]:
st_cat_weather = brazil_weather[brazil_weather['station'].isin(['SBCH','SBCD','SBLJ', 'SBCM', 'SBTR', 'SBFL',
                                                                'SBNF','SBJV'])]
st_cat_weather.station.unique()

array(['SBCH', 'SBCM', 'SBFL', 'SBJV', 'SBNF'], dtype=object)

### Parana

In [109]:
parana_weather = brazil_weather[brazil_weather['station'].isin(['SBFI', 'SBCA', 'SBTD', 'SBMG', 'SBLO', 'SBTL', 'SBBI', 
                                                                'SBCT', 'SBPG', 'SBGU'])]
parana_weather.station.unique()


array(['SBBI', 'SBCA', 'SBCT', 'SBFI', 'SBGU', 'SBLO', 'SBMG'], dtype=object)

### Minas Gerais

In [110]:
minas_gerais_weather = brazil_weather[brazil_weather['station'].isin(['SBJF', 'SBPC', 'SBFT', 'SBIT', 'SBUL', 'SBUR', 
                                                                      'SBAX', 'SBVG', 'SBFU', 'SBAX', 'SBJF', 'SBBQ',
                                                                      'SBCF', 'SBPR', 'SBBH', 'SBLS', 'SBIP', 'SBGV', 
                                                                      'SBMK'])]
minas_gerais_weather.station.unique()

array(['SBAX', 'SBBH', 'SBBQ', 'SBCF', 'SBIP', 'SBJF', 'SBLS', 'SBMK',
       'SBPC', 'SBPR', 'SBUL', 'SBUR'], dtype=object)

### Bahia Checked


In [111]:
bahia_weather = brazil_weather[brazil_weather['station'].isin(['SBCV', 'SBPS', 'SBTC', 'SBIL', 'SBSV', 'SBUF', 'SBPL',
                                                               'SBLE', 'SBLP', 'SBQV'])]
bahia_weather.station.unique()


array(['SBIL', 'SBLE', 'SBLP', 'SBPL', 'SBPS', 'SBQV', 'SBSV', 'SBTC',
       'SBUF'], dtype=object)

### Pernambuco Checked

In [114]:
pernan_weather = brazil_weather[brazil_weather['station'].isin(['SBRF'])]
pernan_weather.station.unique()


array(['SBRF'], dtype=object)

### Roraima

In [115]:
roraima_weather = brazil_weather[brazil_weather['station'].isin(['SBBV'])]
roraima_weather.station.unique()

array(['SBBV'], dtype=object)

### Ceara

In [116]:
ceara_weather = brazil_weather[brazil_weather['station'].isin(['SBPZ', 'SBJU'])]
ceara_weather.station.unique()

array(['SBJU'], dtype=object)

### Alagoas

In [117]:
alagoas_weather = brazil_weather[brazil_weather['station'].isin(['SBMO', 'SBUF'])]
alagoas_weather.station.unique()

array(['SBMO', 'SBUF'], dtype=object)

### Maranhao

In [118]:
maranhao_weather = brazil_weather[brazil_weather['station'].isin(['SBCI', 'SBIZ', 'SBSL', 'SBPB', 'SBTE'])]
maranhao_weather.station.unique()


array(['SBCI', 'SBIZ', 'SBPB', 'SBSL', 'SBTE'], dtype=object)

### Mato Grosso do Sul

In [119]:
mato_grosso_sul = brazil_weather[brazil_weather['station'].isin(['SBPP', 'SBCR', 'SBCG', 'SBUP'])]
mato_grosso_sul.station.unique()


array(['SBCG', 'SBCR', 'SBPP'], dtype=object)

### Rio Grande do Norte

In [120]:
rio_grande_norte = brazil_weather[brazil_weather['station'].isin(['SBMS', 'SBSG', 'SBNT'])]
rio_grande_norte.station.unique()


array(['SBMS', 'SBNT', 'SBSG'], dtype=object)

### Rio de Janeiro

In [122]:
rio_jan = brazil_weather[brazil_weather['station'].isin(['SBRS', 'SBSC', 'SBJR', 'SBAF', 'SBRJ', 'SBGL', 'SBCB', 'SBES'
                                                        'SBBZ', 'SBME', 'SBFS', 'SBCP'])]
rio_jan.station.unique()



array(['SBAF', 'SBCB', 'SBCP', 'SBFS', 'SBGL', 'SBJR', 'SBME', 'SBRJ',
       'SBSC'], dtype=object)

### Rio Grande do Sul

In [124]:
rio_grande_sul = brazil_weather[brazil_weather['station'].isin(['SBRG', 'SBPK', 'SBBG', 'SBUG', 'SBNM', 'SBSM', 'SBPF', 'SBCX', 
                                                                'SBTR', 'SBPA', 'SBCO'])]
rio_grande_sul.station.unique()


array(['SBBG', 'SBCO', 'SBCX', 'SBPA', 'SBPF', 'SBPK', 'SBSM', 'SBUG'], dtype=object)

### Rondonia

In [126]:
rondonia_weather = brazil_weather[brazil_weather['station'].isin(['SBVH', 'SBGM', 'SBPV'])]
rondonia_weather.station.unique()


array(['SBGM', 'SBPV', 'SBVH'], dtype=object)

### Paraiba

In [127]:
paraiba_weather = brazil_weather[brazil_weather['station'].isin(['SBKG', 'SBJP'])]
paraiba_weather.station.unique()

array(['SBJP', 'SBKG'], dtype=object)

### Para

In [128]:
para_weather = brazil_weather[brazil_weather['station'].isin(['SBXI', 'SBEK', 'SBIH', 'SBSN', 'SBTB', 'SBTS', 'SBMD',
                                                              'SBMZ', 'SBBE', 'SBTU', 'SBMA', 'SBCJ', 'SBHT', 'SBMZ', 
                                                              'SBTU'])]
paraiba_weather.station.unique()


array(['SBJP', 'SBKG'], dtype=object)

### Tocantins 

In [129]:
tocatins_weather = brazil_weather[brazil_weather['station'].isin(['SBSY', 'SBAA', 'SBIZ', 'SBCI', 'SBPJ', 'SBPN'])]
tocatins_weather.station.unique()

array(['SBAA', 'SBCI', 'SBIZ', 'SBPJ', 'SBPN'], dtype=object)

### Distrito Federal

In [130]:
## Consider Combining Distrito Federal with Goias
dist_fed_weather = brazil_weather[brazil_weather['station'].isin(['SBBR', 'SBGA'])]
dist_fed_weather.station.unique()

array(['SBBR'], dtype=object)

### Goias

In [131]:
goias_weather = brazil_weather[brazil_weather['station'].isin(['SBIT', 'SBCN', 'SBGO', 'SBAN', 'SBGA', 'SBBR', 'SBMC'])]
goias_weather.station.unique()


array(['SBAN', 'SBBR', 'SBGO'], dtype=object)

In [47]:
# var_list = brazil_weather.columns.tolist()
var_list = ['lon','lat','tmpf','dwpf', 'relh','drct','sknt','p01i', 'alti','mslp']
brazil_weather.tmpf = brazil_weather.tmpf.apply(pd.to_numeric, errors='coerce')

# brazil_weather.tmpf = brazil_weather.tmpf.map(int_replace)
# brazil_weather.tmpf = brazil_weather.tmpf.astype(int) 

# brazil_frame.tmpf = brazil_frame.tmpf.map(int_replace)
# brazil_frame.tmpf = brazil_frame.tmpf.astype(int)
# print brazil_frame.value.dtype


In [None]:
pred_data = pred_data.apply(pd.to_numeric, errors='coerce')

### Acre
Stations
SBTK TARAUCA
SBCZ CUZEIRO DO SUL
SBRB RIO BRANCO/MEDIC

In [None]:

path = '../brazil_weather_data/Acre/asos.txt'
acre_weather = pd.read_csv(path, skiprows=5)
print "Shape of Data", acre_weather.shape
print "++++++++++++++\n"
print "Weather Stations Code Name:"
print acre_weather.station.value_counts()
print "++++++++++++++\n"
acre_weather.head(1)

In [10]:
df.station.value_counts()

SBAA    1020
Name: station, dtype: int64