In [1]:
import pandas as pd
import numpy as np
import os
import pymysql
from sqlalchemy import create_engine

In [2]:
engine = create_engine('mysql+pymysql://sebastieniron:Gsm0474083668@localhost:3306/stockdata')
sqlstr = 'select * from data2004'
data2004 = pd.read_sql_query(sqlstr, engine)

# 2004: Month Before & Month After

In [3]:
# Let's select data from a month before election day until a month after election day 2 November Tuesday.
# Tuesday 1st of October 2004 until Thursday 2nd December 2004

datamonth = data2004.loc[(data2004['Date'].str.startswith('2004-10'))|
                        (data2004['Date'].str.startswith('2004-11'))|
                        (data2004['Date'].str.startswith('2004-12-01'))|
                        (data2004['Date'].str.startswith('2004-12-02'))]
datamonth

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt,Name
188,2004-10-01,14.647,15.546,14.647,15.520,3966733,0,\a.us
189,2004-10-04,15.797,16.052,15.667,15.891,4810241,0,\a.us
190,2004-10-05,15.863,15.945,15.513,15.667,3889111,0,\a.us
191,2004-10-06,15.560,15.573,15.323,15.540,3533871,0,\a.us
192,2004-10-07,15.425,15.546,15.323,15.344,3296549,0,\a.us
...,...,...,...,...,...,...,...,...
91374,2004-11-26,64.863,64.908,64.533,64.591,84832,0,\zion.us
91375,2004-11-29,64.227,64.246,63.380,63.542,754962,0,\zion.us
91376,2004-11-30,63.321,64.014,62.975,64.014,565289,0,\zion.us
91377,2004-12-01,64.227,65.277,63.880,65.277,294843,0,\zion.us


In [4]:
# DF that contains a summarizing statistics per stock
ticker = []
vol_ratio = []
mean_close = []
std_close = []
begin_prices = []
close_prices = []

for stockname in datamonth['Name'].unique():
    name = stockname
    ticker.append(name)
    
    datastock = datamonth.loc[(datamonth['Name']==stockname)]
    
    std = np.std(datastock['Close'])
    std_close.append(std)
    
    mean = np.mean(datastock['Close'])
    mean_close.append(mean)
    
    volratio = std/mean*100
    vol_ratio.append(volratio)
        
    price1 = datamonth.loc[(datamonth['Name']==stockname) &
                           (datamonth['Date']=='2004-10-01')]
    beginprice = np.float64(price1['Close'])
    begin_prices.append(beginprice)
    
    price2 = datamonth.loc[(datamonth['Name']==stockname) &
                           (datamonth['Date']=='2004-12-02')]
    closeprice = np.float64(price2['Close'])
    close_prices.append(closeprice)

In [5]:
# Let's make a function that creates a dataframe out of the summary characteristics.
def DFcreation(names, beginprices, closeprices, meanclose, stdclose, volratios):
    df = pd.DataFrame()
    df['Ticker'] = names
    df['OpeningPrice'] = beginprices
    df['ClosingPrice'] = closeprices
    df['MeanPrice'] = meanclose
    df['StdPrice'] = stdclose
    df['%_VolatilityRatio'] = volratios
    df['OpeningPrice'] = df['OpeningPrice']
    df['%_Gain/Lose'] = ((df['ClosingPrice']-df['OpeningPrice'])/df['OpeningPrice'])*100
    return df

In [7]:
df = DFcreation(ticker, begin_prices, close_prices, mean_close, std_close, vol_ratio)
df = df[df.astype(str)['%_Gain/Lose'] != '[]']
df['%_Gain/Lose'] = df['%_Gain/Lose'].astype('float')
df['ClosingPrice'] = df['ClosingPrice'].astype('float')

In [8]:
vol_month_mean = np.mean(df['%_VolatilityRatio'])
GL_month_mean = np.mean(df['%_Gain/Lose'])
print("The mean volatility ratio from 2004/10/01 until 2004/12/02:", round(vol_month_mean, 2),"%",
      "\nThe mean Gain or Lose ratio 2004/10/01 until 2004/12/02:", round(GL_month_mean, 2),"%",)

The mean volatility ratio from 2004/10/01 until 2004/12/02: 4.97 % 
The mean Gain or Lose ratio 2004/10/01 until 2004/12/02: 8.41 %


# 2004: Week Before & Week After

In [9]:
dataweek = data2004.loc[(data2004['Date'].str.startswith('2004-10-26'))|
                        (data2004['Date'].str.startswith('2004-10-27'))|
                        (data2004['Date'].str.startswith('2004-10-28'))|
                        (data2004['Date'].str.startswith('2004-10-29'))|
                        (data2004['Date'].str.startswith('2004-10-30'))|
                        (data2004['Date'].str.startswith('2004-10-31'))|
                        (data2004['Date'].str.startswith('2004-11-01'))|
                        (data2004['Date'].str.startswith('2004-11-02'))|
                        (data2004['Date'].str.startswith('2004-11-03'))|
                        (data2004['Date'].str.startswith('2004-11-04'))|
                        (data2004['Date'].str.startswith('2004-11-05'))|
                        (data2004['Date'].str.startswith('2004-11-06'))|
                        (data2004['Date'].str.startswith('2004-11-07'))|
                        (data2004['Date'].str.startswith('2004-11-08'))|
                        (data2004['Date'].str.startswith('2004-11-09'))]
dataweek

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt,Name
205,2004-10-26,16.141,16.423,15.945,16.423,2499266,0,\a.us
206,2004-10-27,16.355,16.945,16.295,16.862,3283513,0,\a.us
207,2004-10-28,16.862,17.077,16.647,16.740,2567999,0,\a.us
208,2004-10-29,16.740,17.058,16.686,16.917,3034340,0,\a.us
209,2004-11-01,16.843,17.011,16.606,16.681,2580887,0,\a.us
...,...,...,...,...,...,...,...,...
91358,2004-11-03,63.264,64.323,63.235,63.916,453765,0,\zion.us
91359,2004-11-04,63.737,64.543,63.437,64.495,395064,0,\zion.us
91360,2004-11-05,64.807,65.014,63.041,63.907,756908,0,\zion.us
91361,2004-11-08,63.737,63.935,63.311,63.427,370750,0,\zion.us


In [10]:
# DF that contains a summarizing statistics per stock
ticker = []
vol_ratio = []
mean_close = []
std_close = []
begin_prices = []
close_prices = []

for stockname in dataweek['Name'].unique():
    name = stockname
    ticker.append(name)
    
    datastock = dataweek.loc[(dataweek['Name']==stockname)]
    
    std = np.std(datastock['Close'])
    std_close.append(std)
    
    mean = np.mean(datastock['Close'])
    mean_close.append(mean)
    
    volratio = std/mean*100
    vol_ratio.append(volratio)
        
    price1 = dataweek.loc[(dataweek['Name']==stockname) &
                           (dataweek['Date']=='2004-10-26')]
    beginprice = np.float64(price1['Close'])
    begin_prices.append(beginprice)
    
    price2 = dataweek.loc[(dataweek['Name']==stockname) &
                           (dataweek['Date']=='2004-11-09')]
    closeprice = np.float64(price2['Close'])
    close_prices.append(closeprice)

In [13]:
dfweek = DFcreation(ticker, begin_prices, close_prices, mean_close, std_close, vol_ratio)
dfweek = dfweek[dfweek.astype(str)['%_Gain/Lose'] != '[]']
dfweek['%_Gain/Lose'] = dfweek['%_Gain/Lose'].astype('float')
dfweek['ClosingPrice'] = dfweek['ClosingPrice'].astype('float')

In [14]:
vol_week_mean = np.mean(dfweek['%_VolatilityRatio'])
GL_week_mean = np.mean(dfweek['%_Gain/Lose'])
print("The mean volatility ratio from 2004/10/26 until 2004/11/09:", round(vol_week_mean, 2),"%",
      "\nThe mean Gain or Lose ratio 2004/10/26 until 2004/11/09:", round(GL_week_mean, 2),"%",)

The mean volatility ratio from 2004/10/26 until 2004/11/09: 2.56 % 
The mean Gain or Lose ratio 2004/10/26 until 2004/11/09: 5.17 %


# 2004: Day Before & Day After

In [25]:
dataday = data2004.loc[(data2004['Date'].str.startswith('2004-11-01'))|
                        (data2004['Date'].str.startswith('2004-11-02'))|
                        (data2004['Date'].str.startswith('2004-11-03'))]
dataday

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt,Name
209,2004-11-01,16.843,17.011,16.606,16.681,2580887,0,\a.us
210,2004-11-02,16.518,17.139,16.458,16.822,3346181,0,\a.us
211,2004-11-03,17.455,17.455,16.579,16.768,4780763,0,\a.us
461,2004-11-01,68.175,68.904,68.049,68.631,1675783,0,\aa.us
462,2004-11-02,68.631,68.651,66.917,66.979,2344679,0,\aa.us
...,...,...,...,...,...,...,...,...
91105,2004-11-02,73.541,73.810,71.881,72.214,1787473,0,\zbh.us
91106,2004-11-03,73.968,74.055,72.595,73.895,2287503,0,\zbh.us
91356,2004-11-01,63.629,64.005,63.512,63.803,440800,0,\zion.us
91357,2004-11-02,63.794,64.034,63.226,63.273,527965,0,\zion.us


In [26]:
# DF that contains a summarizing statistics per stock
ticker = []
vol_ratio = []
mean_close = []
std_close = []
begin_prices = []
close_prices = []

for stockname in dataday['Name'].unique():
    name = stockname
    ticker.append(name)
    
    datastock = dataday.loc[(dataday['Name']==stockname)]
    
    std = np.std(datastock['Close'])
    std_close.append(std)
    
    mean = np.mean(datastock['Close'])
    mean_close.append(mean)
    
    volratio = std/mean*100
    vol_ratio.append(volratio)
        
    price1 = dataday.loc[(dataday['Name']==stockname) &
                           (dataday['Date']=='2004-11-01')]
    beginprice = np.float64(price1['Close'])
    begin_prices.append(beginprice)
    
    price2 = dataday.loc[(dataday['Name']==stockname) &
                           (dataday['Date']=='2004-11-03')]
    closeprice = np.float64(price2['Close'])
    close_prices.append(closeprice)

In [27]:
dfday = DFcreation(ticker, begin_prices, close_prices, mean_close, std_close, vol_ratio)
dfday = dfday[dfday.astype(str)['%_Gain/Lose'] != '[]']
dfday['%_Gain/Lose'] = dfday['%_Gain/Lose'].astype('float')
dfday['ClosingPrice'] = dfday['ClosingPrice'].astype('float')
dfday

Unnamed: 0,Ticker,OpeningPrice,ClosingPrice,MeanPrice,StdPrice,%_VolatilityRatio,%_Gain/Lose
0,\a.us,16.681,16.7680,16.757000,0.058086,0.346638,0.521551
1,\aa.us,68.631,67.0200,67.543333,0.769279,1.138941,-2.347336
2,\aaba.us,36.92,37.9700,37.543333,0.450654,1.200357,2.843987
3,\aapl.us,3.3592,3.5422,3.442367,0.075633,2.197129,5.447726
4,\abc.us,12.414,12.8140,12.666333,0.179288,1.415470,3.222169
...,...,...,...,...,...,...,...
360,\xray.us,25.036,25.2420,25.079333,0.119134,0.475028,0.822815
361,\xrx.us,51.617,53.2110,52.271333,0.681308,1.303406,3.088130
362,\yum.us,13.704,13.9240,13.758667,0.119123,0.865801,1.605371
363,\zbh.us,72.424,73.8950,72.844333,0.747864,1.026660,2.031095


In [29]:
vol_day_mean = np.mean(dfday['%_VolatilityRatio'])
GL_day_mean = np.mean(dfday['%_Gain/Lose'])
print("The mean volatility ratio from 2004/11/01 until 2004/11/03:", round(vol_day_mean, 2),"%",
      "\nThe mean Gain or Lose ratio 2004/11/01 until 2004/11/03:", round(GL_day_mean, 2),"%",)

The mean volatility ratio from 2004/11/01 until 2004/11/03: 1.03 % 
The mean Gain or Lose ratio 2004/11/01 until 2004/11/03: 1.04 %


In [30]:
df2004 = pd.DataFrame()
df2004['Year'] = [2004,2004,2004]
df2004['Period'] = ['Day','Week','Month']
df2004['%_Vol_Mean'] = [vol_day_mean,vol_week_mean,vol_month_mean]
df2004['%_GainLose_Mean'] = [GL_day_mean,GL_week_mean,GL_month_mean]
df2004

Unnamed: 0,Year,Period,%_Vol_Mean,%_GainLose_Mean
0,2004,Day,1.025645,1.044233
1,2004,Week,2.564266,5.16795
2,2004,Month,4.965819,8.409113


In [31]:
df2004.to_csv('df2004.csv', index=False)