In [1]:
import csv
import datetime
import pandas as pd
import pymongo

## Clean Data

In [2]:
fname = "07510.01.01.2005.28.11.2016.1.0.0.en.utf8.00000000.csv"
weather = pd.DataFrame.from_csv(fname, sep=";", index_col=None)

column_name_map = {
    "Local time in Bordeaux (airport)": "date",
    "T": "temperature",
    "P": "pressure",
    "U": "relative_humidity",
    "Ff": "wind_speed",
    "Tn": "min_temp",
    "Tx": "max_temp",
    "RRR": "precipitation_mm",
    "sss": "snow_depth"
}

cols = list(weather.columns)
new_columns = []
for i in range(0, len(cols)):
    if cols[i] in column_name_map:
        new_columns.append(column_name_map[cols[i]])
    else:
        del weather[cols[i]]
weather.columns = new_columns

weather = weather.replace("No precipitation", 0.0)
weather = weather.replace("Trace of precipitation", 0.0)

weather.date = pd.to_datetime(weather.date)
weather.temperature = weather.temperature.astype("float64")
weather.pressure = weather.pressure.astype("float64")
weather.relative_humidity = weather.relative_humidity.astype("float64")
weather.wind_speed = weather.wind_speed.astype("float64")
weather.precipitation_mm = weather.wind_speed.astype("float64")

## Aggregate Rainfall by Month

In [None]:
weather.date = weather.date.apply(lambda x: datetime.datetime(day=1, month=x.month, year=x.year))
monthly_sum = weather.groupby(weather.date.dt.date).sum().reset_index()
monthly_sum.date = pd.to_datetime(monthly_sum.date)
avg_monthly_rain = monthly_sum.precipitation_mm.groupby(monthly_sum.date.dt.year).mean()

## Put Weather Data into MongoDB

In [4]:
url = "mongodb://group:group@ds029635.mlab.com:29635/fods-seven"
client = pymongo.MongoClient(url)
db = client["fods-seven"]

weather_data = db.weather_data
w = weather.to_dict()
agg = []
for i in range(0, len(w["min_temp"])):
    entry = {
        "date": w["date"][i],
        "temperature": w["temperature"][i],
        "pressure": w["pressure"][i],
        "relative_humidity": w["relative_humidity"][i],
        "wind_speed": w["wind_speed"][i],
        "min_temp": w["min_temp"][i],
        "max_temp": w["max_temp"][i],
        "precipitation_mm": w["precipitation_mm"][i],
        "snow_depth": w["snow_depth"][i]
    }
    
    agg.append(entry)
    
    if len(agg) > 500:
        weather_data.insert_many(agg)
        agg = []
_ = weather_data.insert_many(agg)

<pymongo.results.InsertManyResult at 0x7fdc4ddbbd80>