In [1]:
import pandas as pd
import numpy as np

def temperature():
    df = pd.read_csv('temperaturas.csv')
    year_cond = (df['Anio']==2006)
    df = df[year_cond]
    year_string = df['Anio'].astype(str)+'-'+df['Mes'].astype(str)+'-'+df['Día'].astype(str)
    df['Date'] = pd.to_datetime(year_string,format='%Y-%m-%d',errors='coerce')
    df = df[['Date','Datos']].set_index('Date').dropna()
    df = df.rename(columns={'Datos':'Temperature'})
    return df

In [2]:
from simpledbf import Dbf5
# install simpledbf, pytables and sqlalchemy
# pip install tables; pip install sqlalchemy

def defun():
    dbf = Dbf5('DEFUN06.DBF')
    df = dbf.to_dataframe()
    year_cond = (df['ANIO_OCUR']==2006)
    df = df[year_cond]
    year_string = df['ANIO_OCUR'].astype(str)+'-'+df['MES_OCURR'].astype(str)+'-'+df['DIA_OCURR'].astype(str)
    df['Date'] = pd.to_datetime(year_string,format='%Y-%m-%d',errors='coerce')
    entity_cond = (df['ENT_OCURR']==9)
    df = df[entity_cond]
    df = df.groupby('Date').size().reset_index(name='No. of deaths')
    df = df.set_index('Date').sort_index()
    return df

In [3]:
df1 = temperature()
df2 = defun()
df = pd.merge(df1,df2,left_index=True,right_index=True,how='inner')
df

Unnamed: 0_level_0,Temperature,No. of deaths
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-01-09,10.50,196
2006-01-10,11.50,211
2006-01-11,12.75,199
2006-01-12,12.75,209
2006-01-13,14.00,203
...,...,...
2006-12-24,12.50,154
2006-12-25,10.00,202
2006-12-26,9.50,193
2006-12-27,10.00,191


In [10]:
%matplotlib ipympl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import scipy.stats as stats
from numpy.fft import rfft,irfft
from decimal import Decimal

x = df.index
y = [df['Temperature'],df['No. of deaths']]
corr, pval = stats.pearsonr(y[0],y[1])

def display_plot(filt=0):
    Y = y.copy()
    ######## FILTERING AND SMOOTHING
    for i,data in enumerate(Y):
        c = rfft(data)
        N = len(c)
        # filt is percentage of Fourier coefficients
        for n in range(int(N*(1-filt))):
            c[int(filt*N)+n] = 0
        Y[i] = irfft(c)

    ######## PLOTTING
    fig, ax1 = plt.subplots();
    col1 = 'teal'
    ax1.plot(x,Y[0],col1);
    ax1.plot(x,y[0],col1,alpha=0.23);
    ax1.set_ylabel('Temperature ($^{\circ} C$)');
    ax1.tick_params(axis='y',labelcolor=col1);
    ax2 = ax1.twinx();  # instantiate a second axes that shares the same x-axis
    # we already handled the x-label with ax1
    col2 = 'firebrick'
    ax2.plot(x,Y[1],col2);
    ax2.plot(x,y[1],col2,alpha=0.23);
    ax2.set_ylabel('No. of deaths');
    ax2.tick_params(axis='y',labelcolor=col2);

    # Set the locator
    locator = mdates.MonthLocator();  # every month
    # Specify the format - %b gives us Jan, Feb...
    fmt = mdates.DateFormatter('%b');
    X = plt.gca().xaxis;
    X.set_major_locator(locator);
    # Specify formatter;
    X.set_major_formatter(fmt);
    plt.title('Correlation between mean temperature and mortality\ntroughout the year 2006 in Mexico City');

    txt = 'corr = {}\np-val = {:.1E}'.format(round(corr,2),Decimal(pval))
    props = dict(boxstyle='round',facecolor='wheat',alpha=0.5);
    plt.text(0.05,0.95,txt,fontsize=9,transform=ax1.transAxes,verticalalignment='top',bbox=props);

display_plot(0.25)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …