In [189]:
# Isaac Menninga, 2015
# script to load data for one country
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm, scipy

%matplotlib inline

In [191]:
# load conflict data
def load_conflict_data(country_name):
    '''This function is used to grab data on conflict occurances in a specific country. 
    The function takes one input, which is the name of the country you want to grab data for. Returns the data as a pandas data frame.'''
    
    #path is a string containing the path for the data files
    path = "../data/ACLED/" + country_name + '.xlsx'

    #read from excel file
    data = pd.read_excel(path)

    #only return necessary columns
    data = data[['EVENT_DATE', 'EVENT_TYPE', 'ACTOR1', 'LOCATION', 'FATALITIES']]
    
    data.columns = ['DATETIME', 'event_type', 'actor', 'location', 'deaths']
        
    #sets index to datetime object
    data.index = pd.to_datetime(data.DATETIME)
    
    return(data)

#load climate data
def load_climate_data(country_name):
    '''This function is used to grab data on climate for each country.
    The function takes one input, which is the name of the country to grab data on. The function returns the data as a pandas data frame.'''
    
    #extreme variables is a dictionary containing the file extension corresponding to each variable for extreme weather patterns
    extremes_variables = {'f_extreme_heat' : 'TX90p.hadex.abs', 'f_extreme_cold' : 'TX10p.hadex.abs', 'f_heavy_rain' : 'R95pct.hadex.anom'}

    #mean variables is a dictionary containing the file extension corresponding to each variable for mean temperature and precipitation
    mean_variables = {'temperature' : 'temp.cru.abs', 'precipitation' : 'precip.cru.abs'}

    #contains the file path for each group of variables
    #Extremes/... contains data on extreme weather, Mean/... contains mean temperature and precipitation data
    directories = ['/Observed/Extremes/Timeseries/', '/Observed/Mean/Timeseries/Absolute/']
    
    data = pd.DataFrame()
    # for each group of variables
    for directory in directories:
        # if the data is for extremes, iterate over extremes_variables and gets the final path for each file to grab
        if directory == '/Observed/Extremes/Timeseries/':
            #gets the variable name from a list of variables
            #each variable corresponds to the end of the name of the file
            for variable in extremes_variables:
                #path is equal to the file path for the data file containing data on the current variable
                #concatonates country name, directory names and file names to form the specific file path
                path = '../data/climate/' + country_name + directory + country_name + '.ts.obs.' + extremes_variables[variable] + '.txt'
                
                #if the data frame is empty, set data equal to the contents of the first file
                if data.empty:
                    data = pd.read_table(path, delimiter = '\s+', header = 7)
                    
                #if the data frame is not empty, concatonate the data from the second file to the first
                else:
                    data_2 = pd.read_table(path, delimiter = '\s+', header = 7)
                    data = pd.concat([data, data_2], axis = 1)   

        # if the data is for means, iterate over mean_variables and get the final path for each data file
        elif directory == '/Observed/Mean/Timeseries/Absolute/':
            #gets the variable name from a list of variables
            #each variable corresponds to the end of the name of the file
            for variable in mean_variables:
                #path is equal to the file path for the data file containing data on the current variable
                #concatonates country name, directory names and file names to form the specific file path
                path = '../data/climate/' + country_name + directory + country_name + '.ts.obs.' + mean_variables[variable] + '.txt'
                
                #if the data frame is empty, set data equal to the contents of the first file
                if data.empty:
                    data = pd.read_table(path, delimiter = '\s+', header = 7)
                    
                #if the data frame is not empty, concatonate the data from the second file to the first
                else:
                    data_2 = pd.read_table(path, delimiter = '\s+', header = 7)
                    data = pd.concat([data, data_2], axis = 1)   
    
    #renames columns to clarify which variable corresponds to which set of months
    #note: DJF = Dec., Jan., Feb., MAM = Mar., Apr., May., ... etc.
    data.columns = ['DATETIME', 'Annual_TX90p', 'DJF_TX90p', 'MAM_TX90p', 'JJA_TX90p', 'SON_TX90p', 'YEAR', 'Annual_TX10p', 'DJF_TX10p', 'MAM_TX10p', 'JJA_TX10p', 'SON_TX10p', 'YEAR', 'Annual_R95pct', 'DJF_R95pct', 'MAM_R95pct', 'JJA_R95pct', 'SON_R95pct', 'YEAR', 'Annual_precip', 'JFM_precip', 'AMJ_precip', 'JAS_precip', 'OND_precip', 'YEAR', 'Annual_temp', 'JFM_temp', 'AMJ_temp', 'JAS_temp', 'OND_temp']
    
    #sets index to year for data range from available data
    data.index = pd.to_datetime(data['DATETIME'], format = "%Y", yearfirst = True)
    
    #resamples to set date of datetime to 12-31 instead of 01-01. How = sum has no effect on data because each year has one value
    data = data.resample('A', how = 'sum')
    
    #deletes unnecessary year columns
    del data['YEAR']
    del data['DATETIME']
    
    #return final formatted data frame
    return data

In [233]:
# Load data on Chad to test plotting and statistics
climate_data = load_climate_data('Chad')
print(climate_data.head(6))
conflict_data = load_conflict_data('Chad')
print(conflict_data.head(6))

            Annual_TX90p  DJF_TX90p  MAM_TX90p  JJA_TX90p  SON_TX90p  \
DATETIME                                                               
1960-12-31           NaN        NaN        NaN        NaN        NaN   
1961-12-31           NaN        NaN        NaN        NaN        NaN   
1962-12-31           NaN        NaN        NaN        NaN        NaN   
1963-12-31           NaN        NaN        NaN        NaN        NaN   
1964-12-31           NaN        NaN        NaN        NaN        NaN   
1965-12-31           NaN        NaN        NaN        NaN        NaN   

            Annual_TX10p  DJF_TX10p  MAM_TX10p  JJA_TX10p  SON_TX10p  \
DATETIME                                                               
1960-12-31           NaN        NaN        NaN        NaN       0.45   
1961-12-31           NaN        NaN        NaN        NaN      15.32   
1962-12-31           NaN        NaN        NaN        NaN       3.28   
1963-12-31           NaN        NaN        NaN        NaN      

In [220]:
#join the two data frames
data = pd.concat([climate_data, conflict_data], axis = 1, join_axes=[climate_data.index])

#print(data)

In [234]:
lm = sm.formula.ols(formula = 'Annual_temp ~ deaths', data = data)

ValueError: zero-size array to reduction operation maximum which has no identity

In [223]:
#figure
plt.figure(figsize=(10, 8))

#define subplot 1
plt.subplot(1,2,1)
plt.plot(data.index, data['Annual_R95pct'], color = 'g')

#define subplot 2
plt.subplot(1,2,2)
plt.plot(data.index, data['deaths'], color = 'b')

plt.show()

ValueError: ordinal must be >= 1

<matplotlib.figure.Figure at 0x9dc6898>