In [127]:
import pandas as pd
import numpy as np
import glob
import datetime as dt
import pandas_datareader.data as web
import quandl

In [128]:
data_folder = "data"
csvDataFiles = glob.glob(data_folder + "/*.csv")
dataFiles = []
for dataFile in csvDataFiles:
    df = pd.read_csv(dataFile)
    dataFiles.append(df)

In [129]:
#The number of stocks we have in our data folder
print(len(dataFiles))

30


In [130]:
#This is what our data looks like
dataFiles[0].head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-10-02,52.16,52.209999,51.66,51.869999,51.542397,7516200
1,2017-10-03,52.0,52.049999,51.400002,51.470001,51.144924,8126700
2,2017-10-04,51.509998,52.130001,51.400002,52.080002,51.751072,8204300
3,2017-10-05,52.009998,52.32,51.779999,52.18,51.850441,5287400
4,2017-10-06,52.200001,52.459999,52.119999,52.419998,52.088921,5360300


In [131]:
#This is what the end of the data looks like
#We can see it goes from 10/2/17 - 03/29/18
dataFiles[0].tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
119,2018-03-23,66.699997,67.110001,64.459999,64.629997,64.629997,17343800
120,2018-03-26,65.080002,66.059998,64.989998,65.900002,65.900002,10796000
121,2018-03-27,65.889999,67.639999,65.43,66.169998,66.169998,9452500
122,2018-03-28,66.589996,67.150002,65.269997,65.440002,65.440002,7208400
123,2018-03-29,65.699997,66.550003,65.610001,66.440002,66.440002,10405800


In [132]:
#The dimensions of the stock data -- 124 rows and 7 columns
dataFiles[0].shape

(124, 7)

In [133]:
#Lets store the ratings in a form we can understand
ratings_folder = "Ratings"
xlsxRatingFiles = glob.glob(ratings_folder + "/*.xlsx")
ratingsFiles = []
for ratingFile in xlsxRatingFiles:
    rf = pd.ExcelFile(ratingFile)
    ratingsFiles.append(rf.parse())

In [134]:
#The number of ratings files we have in our ratings folder
print(len(ratingsFiles))

30


In [135]:
#This is what our ratings look like
ratingsFiles[0].head()

Unnamed: 0,0,1,2,3
5,1,1,1,1
4,0,0,0,0
3,7,7,7,7
2,0,0,0,0
1,0,0,0,0


In [136]:
#The dimensions of the ratings data -- 5 rows and 4 columns
ratingsFiles[0].shape

(5, 4)

In [137]:
#Let's now find the stock market returns for each of the last 3 months of our data
#as these are the months that we have ratings for
start3MonthsAgo = dt.datetime(2018, 1, 1)
end2MonthsAgo = dt.datetime(2018, 1, 31)

start2MonthsAgo = dt.datetime(2018, 2, 1)
end1MonthAgo = dt.datetime(2018, 2, 28)

start1MonthAgo = dt.datetime(2018, 3, 1)
end1MonthAgo = dt.datetime(2018, 3, 29)

nasdaqData1MonthAgo = quandl.get("NASDAQOMX/COMP-NASDAQ", trim_start=start1MonthAgo, trim_end=end1MonthAgo)

#Don't execute the next 2 lines until later, otherwise the API will be overloaded and block you
#nasdaqData2MonthsAgo = quandl.get("NASDAQOMX/COMP-NASDAQ", trim_start=end2MonthsAgo, trim_end=end1MonthAgo)
#nasdaqData3MonthsAgo = quandl.get("NASDAQOMX/COMP-NASDAQ", trim_start=start3MonthsAgo, trim_end=end2MonthsAgo)

#Let's see what our data looks like
nasdaqData1MonthAgo.head()

Unnamed: 0_level_0,Index Value,High,Low,Total Market Value,Dividend Market Value
Trade Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-03-01,7180.56,7307.84,7117.66,10964330000000.0,1412534000.0
2018-03-02,7257.87,7267.19,7084.83,11082540000000.0,551955500.0
2018-03-05,7330.7,7350.07,7205.31,11193750000000.0,24234150.0
2018-03-06,7372.01,7378.03,7319.68,11257030000000.0,151957600.0
2018-03-07,7396.65,7403.79,7311.74,11276350000000.0,75352490.0


In [138]:
#Lets drop everything but the index value as that's all we will care about
nasdaqData1MonthAgo = nasdaqData1MonthAgo[['Index Value']]
nasdaqData1MonthAgo.head()

Unnamed: 0_level_0,Index Value
Trade Date,Unnamed: 1_level_1
2018-03-01,7180.56
2018-03-02,7257.87
2018-03-05,7330.7
2018-03-06,7372.01
2018-03-07,7396.65


In [139]:
firstDay = 0
initPrice = nasdaqData1MonthAgo.iloc[firstDay]['Index Value']

lastDay = nasdaqData1MonthAgo.shape[0] - 1
finalPrice = nasdaqData1MonthAgo.iloc[lastDay]['Index Value']

#Market growth
oneMonthAgoROI = (finalPrice - initPrice) / initPrice
print(oneMonthAgoROI)

-0.016310705571710396


In [141]:
#Let's do the same thing for the 2 earlier months
nasdaqData2MonthsAgo = quandl.get("NASDAQOMX/COMP-NASDAQ", trim_start=end2MonthsAgo, trim_end=end1MonthAgo)
nasdaqData2MonthsAgo = nasdaqData2MonthAgo[['Index Value']]

firstDay = 0
initPrice = nasdaqData2MonthsAgo.iloc[firstDay]['Index Value']

lastDay = nasdaqData2MonthsAgo.shape[0] - 1
finalPrice = nasdaqData2MonthsAgo.iloc[lastDay]['Index Value']

#Market growth
twoMonthsAgoROI = (finalPrice - initPrice) / initPrice
print(twoMonthsAgoROI)

-0.05398372197988091


In [142]:
#And for 3rd month ago
nasdaqData3MonthsAgo = quandl.get("NASDAQOMX/COMP-NASDAQ", trim_start=start3MonthsAgo, trim_end=end2MonthsAgo)
nasdaqData23MonthsAgo = nasdaqData3MonthsAgo[['Index Value']]

firstDay = 0
initPrice = nasdaqData3MonthsAgo.iloc[firstDay]['Index Value']

lastDay = nasdaqData3MonthsAgo.shape[0] - 1
finalPrice = nasdaqData3MonthsAgo.iloc[lastDay]['Index Value']

#Market growth
threeMonthsAgoROI = (finalPrice - initPrice) / initPrice
print(threeMonthsAgoROI)

0.057740227490045516


In [143]:
#Let's see how to professionals' ratings for specific stocks compared to the market returns

#The NASDAQ growth looks like this:
#01/01/18 - 01/31/18 : 5.7740227490045516 %
#02/01/18 - 02/28/18 : -5.398372197988091 %
#03/01/18 - 03/29/18 : -1.6310705571710396 %

#We don't need the current rating, so we'll drop it from each of the ratings data frames
#Then, we can calculate the avg rating for one month ago for each stock

avgRating1MonthAgo = []
avgRating2MonthsAgo = []
avgRating3MonthsAgo = []

    #for ratingFile in ratingsFiles:

        #remove current rating
        #del ratingFile[0]

for ratingFile in ratingsFiles:
    avgRating = 0
    xMonthAgo = 1
    totalReviewers = 0

    #Aggreate the ratings for the month
    #ratingFile.iloc[5 - rating][month]
    for index in range(len(ratingFile)):
        rating = 5 - index
        reviewers = ratingFile.iloc[index][xMonthAgo]
        totalReviewers += reviewers
        avgRating += rating * reviewers

        #set reviewers to 0 for next iteration
        reviewers = 0
    avgRating /= float(totalReviewers)
    avgRating1MonthAgo.append(avgRating)

#same thing for the 2nd month ago
for ratingFile in ratingsFiles:
    avgRating = 0
    xMonthAgo = 2
    totalReviewers = 0

    #Aggreate the ratings for the month
    #ratingFile.iloc[5 - rating][month]
    for index in range(len(ratingFile)):
        rating = 5 - index
        reviewers = ratingFile.iloc[index][xMonthAgo]
        totalReviewers += reviewers
        avgRating += rating * reviewers

        #set reviewers to 0 for next iteration
        reviewers = 0
    avgRating /= float(totalReviewers)
    avgRating2MonthsAgo.append(avgRating)

#And again for the 3rd month ago
for ratingFile in ratingsFiles:
    avgRating = 0
    xMonthAgo = 3
    totalReviewers = 0

    #Aggreate the ratings for the month
    #ratingFile.iloc[5 - rating][month]
    for index in range(len(ratingFile)):
        rating = 5 - index
        reviewers = ratingFile.iloc[index][xMonthAgo]
        totalReviewers += reviewers
        avgRating += rating * reviewers

        #set reviewers to 0 for next iteration
        reviewers = 0
    avgRating /= float(totalReviewers)
    avgRating3MonthsAgo.append(avgRating)
    

In [None]:
#Now, let's create a data frame with 2 dimensions: the average ranking at the beginning of the month for each specific stock
#and the sign of the difference of the NASDAQ growth and the stock growth

stockROIsMarch = []
#This is for the month of March
for index in range(len(dataFiles)):
    
    stockROI = 0
    
    firstRowIndex = 103
    openIndex = 1
    March1Open = dataFiles[0].iloc[firstRowIndex][openIndex]

    lastRowIndex = dataFiles[0].shape[0] - 1
    closeIndex = 4
    March29Close = dataFiles[0].iloc[lastRowIndex][closeIndex]
    
    stockROI = (March29Close - March1Open) / March1Open
    stockROIsMarch.append(stockROI)
    
stockROIsFebruary = []
#now for feb
for index in range(len(dataFiles)):
    
    stockROI = 0
    
    firstRowIndex = 84
    openIndex = 1
    Feb1Open = dataFiles[0].iloc[firstRowIndex][openIndex]

    lastRowIndex = 102
    closeIndex = 4
    Feb28Close = dataFiles[0].iloc[lastRowIndex][closeIndex]
    
    stockROI = (Feb28Close - Feb1Open) / Feb1Open
    stockROIsFebruary.append(stockROI)
    
stockROIsJanuary = []
#This is for the month of March
for index in range(len(dataFiles)):
    
    stockROI = 0
    
    firstRowIndex = 63
    openIndex = 1
    Feb1Open = dataFiles[0].iloc[firstRowIndex][openIndex]

    lastRowIndex = 83
    closeIndex = 4
    Feb28Close = dataFiles[0].iloc[lastRowIndex][closeIndex]
    
    stockROI = (Feb28Close - Feb1Open) / Feb1Open
    stockROIsFebruary.append(stockROI)
