In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from meteostat import Point, Daily, Monthly
import math
import requests
from scipy import stats
from itertools import compress
from tkinter import *

In [4]:
cap_latlong = pd.read_csv("capital_latlong.csv")
cap_latlong.rename(columns = {'ountry' : 'Country'}, inplace = True)

In [5]:
NYSE_stocks = pd.read_csv('NYSE.csv')

$\textbf{STOCK API ONLY GOES TO 1999}$

For weather:$\newline$
'tavg': avg temp $\newline$
'tmin': min temp $\newline$
'tmax': max temp$\newline$
'prcp': precipitation$\newline$
'wspd': avg windspeed$\newline$
'pres': avg sea-level air pressure$\newline$
'tsun': monthly total sunshine

In [6]:
avg_weathers = ['tavg', 'tmin', 'tmax', 'wspd', 'pres']
sum_weathers = ['prcp', 'tsun']

In [42]:
cap_latlong[cap_latlong['Country'] == 'United States']

Unnamed: 0,Country,Capital,Latitude,Longitude
240,United States,Washington,38.907192,-77.036871


Returns specified weather for country capital 

In [44]:
start = datetime(2000, 1, 1)
end = datetime(2010, 12, 31)
location = Point(34.05611, -118.42972)
weather_data = Monthly(location, start, end)
weather_data = weather_data.fetch()
weather_data

Unnamed: 0_level_0,tavg,tmin,tmax,prcp,wspd,pres,tsun
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-01,14.2,10.2,19.3,21.0,2.5,1020.2,
2000-02-01,13.8,10.1,18.2,104.0,4.2,1019.2,
2000-03-01,14.6,10.6,19.8,61.0,4.8,1016.2,
2000-04-01,16.1,12.9,19.4,48.0,13.8,1016.4,
2000-05-01,17.9,14.6,21.1,0.0,13.2,1013.5,
...,...,...,...,...,...,...,...
2010-08-01,18.8,15.7,21.9,0.0,11.9,1011.8,
2010-09-01,19.4,15.8,22.9,0.0,10.7,1011.4,
2010-10-01,18.7,15.7,21.6,40.0,10.3,1016.1,
2010-11-01,16.3,11.4,21.3,40.0,10.3,1018.0,


In [7]:
def find_weather(start_year, end_year, weather, country = None, latitude = None, longitude = None, time_interval = 'year'):
    start = datetime(start_year, 1, 1)
    end = datetime(end_year, 12, 31)
    if country and not (latitude or longitude):
        location = Point(float(cap_latlong.loc[cap_latlong['Country'] == country, 'Latitude']), float(cap_latlong.loc[cap_latlong['Country'] == country, 'Longitude']))
    elif latitude and longitude and not country:
        location = Point(latitude, longitude)
    else:
        raise ValueError('Invalid location. Input a county OR both latitude and longitude')
    
    weather_data = Monthly(location, start, end)
    weather_data = weather_data.fetch()
    weth = []
    weather_year = []
    
    if time_interval == 'year':
        if weather in avg_weathers:
            for year in np.unique(weather_data.index.year):
                weth.append(np.mean(weather_data.loc[weather_data.index.year == year, weather]))
                weather_year.append(year)
        elif weather in sum_weathers:
            for year in np.unique(weather_data.index.year):
                weth.append(sum(weather_data.loc[weather_data.index.year == year, weather]))
                weather_year.append(year)
        else:
            raise ValueError('Invalid Weather Type') 
        out = list(zip(weather_year, weth))
    elif time_interval == 'month':
        out = list(zip([x.date().strftime('%Y-%m') for x in weather_data.index], list(weather_data[weather])))
    else:
        raise ValueError('Invalid time interval')
                                         
        
    return out
    

In [8]:
class Weather():
    def __init__(self, start_year, end_year, country = None, latitude = None, longitude = None, time_interval = 'year'):
        self.m_tavg = find_weather(start_year, end_year, 'tavg', country = country, latitude = latitude, longitude = longitude, time_interval = 'month')
        self.m_tmin = find_weather(start_year, end_year, 'tmin', country = country, latitude = latitude, longitude = longitude, time_interval = 'month')
        self.m_tmax = find_weather(start_year, end_year, 'tmax', country = country, latitude = latitude, longitude = longitude, time_interval = 'month')
        self.m_prcp = find_weather(start_year, end_year, 'prcp', country = country, latitude = latitude, longitude = longitude, time_interval = 'month')
        self.m_wspd = find_weather(start_year, end_year, 'wspd', country = country, latitude = latitude, longitude = longitude, time_interval = 'month')
        self.m_pres = find_weather(start_year, end_year, 'pres', country = country, latitude = latitude, longitude = longitude, time_interval = 'month')
        self.m_tsun = find_weather(start_year, end_year, 'tsun', country = country, latitude = latitude, longitude = longitude, time_interval = 'month')
        
        self.y_tavg = find_weather(start_year, end_year, 'tavg', country = country, latitude = latitude, longitude = longitude, time_interval = 'year')
        self.y_tmin = find_weather(start_year, end_year, 'tmin', country = country, latitude = latitude, longitude = longitude, time_interval = 'year')
        self.y_tmax = find_weather(start_year, end_year, 'tmax', country = country, latitude = latitude, longitude = longitude, time_interval = 'year')
        self.y_prcp = find_weather(start_year, end_year, 'prcp', country = country, latitude = latitude, longitude = longitude, time_interval = 'year')
        self.y_wspd = find_weather(start_year, end_year, 'wspd', country = country, latitude = latitude, longitude = longitude, time_interval = 'year')
        self.y_pres = find_weather(start_year, end_year, 'pres', country = country, latitude = latitude, longitude = longitude, time_interval = 'year')
        self.y_tsun = find_weather(start_year, end_year, 'tsun', country = country, latitude = latitude, longitude = longitude, time_interval = 'year')
        
    def get_m(self):
        return {
            'tavg' : [x[1] for x in self.m_tavg],
            'tmin' : [x[1] for x in self.m_tmin], 
            'tmax' : [x[1] for x in self.m_tmax],
            'prcp' : [x[1] for x in self.m_prcp],
            'wspd' : [x[1] for x in self.m_wspd],
            'pres' : [x[1] for x in self.m_pres],
            'tsun' : [x[1] for x in self.m_tsun]
        }
    
    def get_y(self):
        return {
            'tavg' : [x[1] for x in self.y_tavg],
            'tmin' : [x[1] for x in self.y_tmin], 
            'tmax' : [x[1] for x in self.y_tmax],
            'prcp' : [x[1] for x in self.y_prcp],
            'wspd' : [x[1] for x in self.y_wspd],
            'pres' : [x[1] for x in self.y_pres],
            'tsun' : [x[1] for x in self.y_tsun]
        }

Returns stock price for specified stock

In [9]:
def find_stock_price(stock, start_year, end_year,  time_interval = 'year'):
    url = 'https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY_ADJUSTED&symbol=' + stock + '&apikey=WR65UNYM9P18M23Q'
    r = requests.get(url)
    stock_data = r.json()
    stock_data = dict(stock_data)
    stock_price = []
    stock_date = []
    
    if time_interval == 'month':
        for key in stock_data['Monthly Adjusted Time Series'].keys():
            if start_year <= int(key[0:4]) <= end_year:
                stock_price.append(float(stock_data['Monthly Adjusted Time Series'][key]['5. adjusted close']))
                stock_date.append(key[0:7])
    elif time_interval == 'year':
        for key in stock_data['Monthly Adjusted Time Series'].keys():
            if (key[5:7] == '12' and (start_year <= int(key[0:4]) <= end_year)):
                stock_price.append(float(stock_data['Monthly Adjusted Time Series'][key]['5. adjusted close']))
                stock_date.append(int(key[0:4]))
    else:
        raise ValueError('Invalid Time Interval')
    
    out = list(zip(stock_date, stock_price))
    
    return out

In [10]:
class Stock():
    def __init__(self, stock, start_year, end_year):
        self.monthly_price = find_stock_price(stock, start_year, end_year, 'month')
        self.yearly_price = find_stock_price(stock, start_year, end_year, 'year')
        
    def get_monthly_price(self):
        return [x[1] for x in self.monthly_price]
    
    def get_yearly_price(self):
        return [x[1] for x in self.yearly_price]


Returns correlation between stock price and whether types in specified country capital

In [11]:
def find_cor(country, stock, weather, start_year, end_year, time_interval = 'year'):
    stock_obj = Stock(stock, start_year, end_year)
    country_obj = Country(country, start_year, end_year)
    
    if time_interval == 'month':
        return list(zip(['tavg', 'tmin', 'tmax', 'prcp', 'wspd', 'pres', 'tsun'], [stats.linregress(stock_obj.get_monthly_price(), country_obj.get_m()['tavg'])[2],
                stats.linregress(stock_obj.get_monthly_price(), country_obj.get_m()['tmin'])[2],
                stats.linregress(stock_obj.get_monthly_price(), country_obj.get_m()['tmax'])[2],
                stats.linregress(stock_obj.get_monthly_price(), country_obj.get_m()['prcp'])[2],
                stats.linregress(stock_obj.get_monthly_price(), country_obj.get_m()['wspd'])[2],
                stats.linregress(stock_obj.get_monthly_price(), country_obj.get_m()['pres'])[2],
                stats.linregress(stock_obj.get_monthly_price(), country_obj.get_m()['tsun'])[2]]))
    
    elif time_interval == 'year':
        return list(zip(['tavg', 'tmin', 'tmax', 'prcp', 'wspd', 'pres', 'tsun'], [stats.linregress(stock_obj.get_yearly_price(), country_obj.get_y()['tavg'])[2],
                stats.linregress(stock_obj.get_yearly_price(), country_obj.get_y()['tmin'])[2],
                stats.linregress(stock_obj.get_yearly_price(), country_obj.get_y()['tmax'])[2],
                stats.linregress(stock_obj.get_yearly_price(), country_obj.get_y()['prcp'])[2],
                stats.linregress(stock_obj.get_yearly_price(), country_obj.get_y()['wspd'])[2],
                stats.linregress(stock_obj.get_yearly_price(), country_obj.get_y()['pres'])[2],
                stats.linregress(stock_obj.get_yearly_price(), country_obj.get_y()['tsun'])[2]]))
    

In [12]:
def find_cor_country_list(countries, stock, weather, start_year, end_year, time_interval = 'year'):
    out = []
    
    stock_data = find_stock_price(stock, start_year, end_year, time_interval)
    stock_price = list([float(item[1]) for item in stock_data])
    
    for country in countries:
        weather_data = find_weather(country, weather, start_year, end_year, time_interval)
        weather_vals = list([float(item[1]) for item in weather_data])
        
        out.append([country, stock, weather, stats.linregress(stock_price, weather_vals)[2]])
    
    return out

In [13]:
def find_cor_weather_list(country, stock, weathers, start_year, end_year, time_interval = 'year'):
    out = []
    
    stock_obj = Stock(stock, start_year, end_year)
    
    for weather in weathers:
        weather_data = find_weather(country, weather, start_year, end_year, time_interval)
        weather_vals = list([float(item[1]) for item in weather_data])
        
        out.append([country, stock, weather, stats.linregress(stock_price, weather_vals)[2]])
    return out

$\textbf{Main Function}$

Finds correlation between stock price and weather for years prior and returns scatterplot if correlated

In [14]:
def historical_cor(country, stock, start_year, end_year, time_interval = 'year'):
    rewind = end_year - start_year
    stock_obj = Stock(stock, start_year, end_year)
    
    for i in range(0, rewind):
        country_obj = Country(country, start_year - i, end_year - i)

        for weather_type in country_obj.get_y().keys():
            if (abs(stats.linregress(stock_obj.get_yearly_price(), country_obj.get_y()[weather_type])[2])) > 0.8:
                if abs(stats.linregress(stock_obj.get_yearly_price(), [x[1] for x in find_weather(country, weather_type, start_year - i, end_year - i)])[2]) > 0.8:
                    ax = sns.regplot(x = stock_obj.get_yearly_price(), y = [x[1] for x in find_weather(country, weather_type, start_year - i, end_year - i)])
                    ax.set_xlabel(stock + ' Price from ' + str(start_year) + '-' + str(end_year))
                    ax.set_ylabel(weather_type + ' in ' + list(cap_latlong.loc[cap_latlong['Country'] == country, 'Capital'])[0] + ', ' + country + 'from ' + str(start_year - i) + '-' + str(end_year - i))
                    ax.set_title(weather_type + ' in ' + list(cap_latlong.loc[cap_latlong['Country'] == country, 'Capital'])[0] + ' ' + str(i) + ' years prior to ' + stock + ' stock')
                    plt.show()

In [15]:
def historical_cor_list(countries, stock, start_year, end_year, time_interval = 'year'):
    rewind = end_year - start_year
    stock_obj = Stock(stock, start_year, end_year)
    
    for country in countries:
        
        for i in range(0, rewind):
            country_obj = Country(country, start_year - i, end_year - i)

            for weather_type in country_obj.get_y().keys():
                if (abs(stats.linregress(stock_obj.get_yearly_price(), country_obj.get_y()[weather_type])[2])) > 0.9:
                    if abs(stats.linregress(stock_obj.get_yearly_price(), [x[1] for x in find_weather(country, weather_type, start_year - i, end_year - i)])[2]) > 0.9:
                        ax = sns.regplot(x = stock_obj.get_yearly_price(), y = [x[1] for x in find_weather(country, weather_type, start_year - i, end_year - i)])
                        ax.set_xlabel(stock + ' Price from ' + str(start_year) + '-' + str(end_year))
                        ax.set_ylabel(weather_type + ' in ' + list(cap_latlong.loc[cap_latlong['Country'] == country, 'Capital'])[0] + ', ' + country + 'from ' + str(start_year - i) + '-' + str(end_year - i))
                        ax.set_title(weather_type + ' in ' + list(cap_latlong.loc[cap_latlong['Country'] == country, 'Capital'])[0] + ' ' + str(i) + ' years prior to ' + stock + ' stock; cor = ' + str(stats.linregress(stock_obj.get_yearly_price(), [x[1] for x in find_weather(country, weather_type, start_year - i, end_year - i)])[2]))
                        plt.show()

In [47]:
def historical_cor(stock, start_year, end_year, countries = [], latitude = [], longitude = [], time_interval = 'year'):
    rewind = end_year - start_year
    stock_obj = Stock(stock, start_year, end_year)

    for country in countries:
        if country in list(cap_latlong['Country']):
            for i in range(0, rewind):
                country_obj = Weather(start_year - i, end_year - i, country, time_interval = time_interval)
                for weather_type in country_obj.get_y().keys():
                    if (abs(stats.linregress(stock_obj.get_yearly_price(), country_obj.get_y()[weather_type])[2])) > 0.9:
                        ax = sns.regplot(x = stock_obj.get_yearly_price(), y = [x[1] for x in find_weather(start_year - i, end_year - i, weather_type, country)])
                        ax.set_xlabel(stock + ' Price from ' + str(start_year) + '-' + str(end_year))
                        ax.set_ylabel(weather_type + ' in ' + list(cap_latlong.loc[cap_latlong['Country'] == country, 'Capital'])[0] + ', ' + country + 'from ' + str(start_year - i) + '-' + str(end_year - i))
                        ax.set_title(weather_type + ' in ' + list(cap_latlong.loc[cap_latlong['Country'] == country, 'Capital'])[0] + ' ' + str(i) + ' years prior to ' + stock + ' stock; cor = ' + str(stats.linregress(stock_obj.get_yearly_price(), [x[1] for x in find_weather(start_year - i, end_year - i, weather_type, country)])[2]))
                        plt.show()
                        
    if latitude and longitude and len(latitude) == len(longitude):
        for i in range(len(latitude)):
            for j in range(0, rewind):
                location_obj = Weather(start_year - j, end_year - j, latitude = latitude[i], longitude = longitude[i], time_interval = time_interval)
                for weather_type in location_obj.get_y().keys():
                    if (abs(stats.linregress(stock_obj.get_yearly_price(), location_obj.get_y()[weather_type])[2])) > 0.9:
                        ax = sns.regplot(x = stock_obj.get_yearly_price(), y = [x[1] for x in find_weather(start_year - i, end_year - i, weather_type, latitude = latitude, longitude = longitude)])
                        ax.set_xlabel(stock + ' Price from ' + str(start_year) + '-' + str(end_year))
                        ax.set_ylabel(weather_type + ' at (' + str(latitude) + ', ' + str(longitude) + ') from ' + str(start_year - i) + '-' + str(end_year - i))
                        ax.set_title(weather_type + ' in ' + list(cap_latlong.loc[cap_latlong['Country'] == country, 'Capital'])[0] + ' ' + str(i) + ' years prior to ' + stock + ' stock; cor = ' + str(stats.linregress(stock_obj.get_yearly_price(), [x[1] for x in find_weather(start_year - i, end_year - i, weather_type, latitude = latitude, longitude = longitude)])[2]))
                        plt.show()