In [78]:
import pandas as pd
import numpy as np
import glob
import datetime as dt
import pandas_datareader.data as web
import quandl

In [79]:
data_folder = "data"
csvDataFiles = glob.glob(data_folder + "/*.csv")
dataFiles = []
for dataFile in csvDataFiles:
    df = pd.read_csv(dataFile)
    dataFiles.append(df)

In [80]:
#The number of stocks we have in our data folder
print(len(dataFiles))

30


In [81]:
#This is what our data looks like
dataFiles[0].head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-10-02,52.16,52.209999,51.66,51.869999,51.542397,7516200
1,2017-10-03,52.0,52.049999,51.400002,51.470001,51.144924,8126700
2,2017-10-04,51.509998,52.130001,51.400002,52.080002,51.751072,8204300
3,2017-10-05,52.009998,52.32,51.779999,52.18,51.850441,5287400
4,2017-10-06,52.200001,52.459999,52.119999,52.419998,52.088921,5360300


In [82]:
#This is what the end of the data looks like
#We can see it goes from 10/2/17 - 03/29/18
dataFiles[0].tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
119,2018-03-23,66.699997,67.110001,64.459999,64.629997,64.629997,17343800
120,2018-03-26,65.080002,66.059998,64.989998,65.900002,65.900002,10796000
121,2018-03-27,65.889999,67.639999,65.43,66.169998,66.169998,9452500
122,2018-03-28,66.589996,67.150002,65.269997,65.440002,65.440002,7208400
123,2018-03-29,65.699997,66.550003,65.610001,66.440002,66.440002,10405800


In [83]:
#The dimensions of the stock data -- 124 rows and 7 columns
dataFiles[0].shape

(124, 7)

In [84]:
#Lets store the ratings in a form we can understand
ratings_folder = "Ratings"
xlsxRatingFiles = glob.glob(ratings_folder + "/*.xlsx")
ratingsFiles = []
for ratingFile in xlsxRatingFiles:
    rf = pd.ExcelFile(ratingFile)
    ratingsFiles.append(rf.parse())

In [85]:
#The number of ratings files we have in our ratings folder
print(len(ratingsFiles))

30


In [86]:
#This is what our ratings look like
ratingsFiles[0].head()

Unnamed: 0,0,1,2,3
5,1,1,1,1
4,0,0,0,0
3,7,7,7,7
2,0,0,0,0
1,0,0,0,0


In [87]:
#The dimensions of the ratings data -- 5 rows and 4 columns
ratingsFiles[0].shape

(5, 4)

In [88]:
#Let's now find the stock market returns for each of the last 3 months of our data
#as these are the months that we have ratings for
start3MonthsAgo = dt.datetime(2017, 12, 29)
end2MonthsAgo = dt.datetime(2018, 1, 29)

start2MonthsAgo = end2MonthsAgo
end1MonthAgo = dt.datetime(2018, 2, 28)

start1MonthAgo = end1MonthAgo
end1MonthAgo = dt.datetime(2018, 3, 29)

nasdaqData1MonthAgo = quandl.get("NASDAQOMX/COMP-NASDAQ", trim_start=start1MonthAgo, trim_end=end1MonthAgo)

#Don't execute the next 2 lines until later, otherwise the API will be overloaded and block you
#nasdaqData2MonthsAgo = quandl.get("NASDAQOMX/COMP-NASDAQ", trim_start=end2MonthsAgo, trim_end=end1MonthAgo)
#nasdaqData3MonthsAgo = quandl.get("NASDAQOMX/COMP-NASDAQ", trim_start=start3MonthsAgo, trim_end=end2MonthsAgo)

#Let's see what our data looks like
nasdaqData1MonthAgo.head()

LimitExceededError: (Status 429) (Quandl Error QELx04) You have exceeded the API speed limit. Please slow down your requests by reducing the number of requests made at the same time.

In [None]:
#Lets drop everything but the index value as that's all we will care about
nasdaqData1MonthAgo = nasdaqData1MonthAgo[['Index Value']]
nasdaqData1MonthAgo.head()

In [None]:
firstDay = 0
initPrice = nasdaqData1MonthAgo.iloc[firstDay]['Index Value']

lastDay = nasdaqData1MonthAgo.shape[0] - 1
finalPrice = nasdaqData1MonthAgo.iloc[lastDay]['Index Value']

#Market growth
oneMonthAgoROI = (finalPrice - initPrice) / initPrice
print(oneMonthAgoROI)

In [None]:
#Let's do the same thing for the 2 earlier months
nasdaqData2MonthsAgo = quandl.get("NASDAQOMX/COMP-NASDAQ", trim_start=end2MonthsAgo, trim_end=end1MonthAgo)
nasdaqData2MonthsAgo = nasdaqData2MonthAgo[['Index Value']]

firstDay = 0
initPrice = nasdaqData2MonthsAgo.iloc[firstDay]['Index Value']

lastDay = nasdaqData2MonthsAgo.shape[0] - 1
finalPrice = nasdaqData2MonthsAgo.iloc[lastDay]['Index Value']

#Market growth
twoMonthsAgoROI = (finalPrice - initPrice) / initPrice
print(twoMonthsAgoROI)

In [None]:
#And for 3rd month ago
nasdaqData3MonthsAgo = quandl.get("NASDAQOMX/COMP-NASDAQ", trim_start=start3MonthsAgo, trim_end=end2MonthsAgo)
nasdaqData23MonthsAgo = nasdaqData3MonthsAgo[['Index Value']]

firstDay = 0
initPrice = nasdaqData3MonthsAgo.iloc[firstDay]['Index Value']

lastDay = nasdaqData3MonthsAgo.shape[0] - 1
finalPrice = nasdaqData3MonthsAgo.iloc[lastDay]['Index Value']

#Market growth
threeMonthsAgoROI = (finalPrice - initPrice) / initPrice
print(threeMonthsAgoROI)

In [108]:
#Let's see how to professionals' ratings for specific stocks compared to the market returns

#The NASDAQ growth looks like this:
#12/29/17 - 01/29/18 : 8.15715177615635 %
#01/29/18 - 02/28/18 : -5.398372197988091 %
#02/28/18 - 03/29/18 : -2.881475482640621 %

#We don't need the current rating, so we'll drop it from each of the ratings data frames
#Then, we can calculate the avg rating for one month ago for each stock

avgRating1MonthAgo = []
avgRating2MonthsAgo = []
avgRating3MonthsAgo = []

#for ratingFile in ratingsFiles:
    
    #remove current rating
    #del ratingFile[0]
    
for ratingFile in ratingsFiles:
    
    avgRating = 0
    xMonthAgo = 1
    totalReviewers = 0
    
    #Aggreate the ratings for the month
    #ratingFile.iloc[5 - rating][month]
    for index in range(len(ratingFile)):
        rating = 5 - index
        reviewers = ratingFile.iloc[index][xMonthAgo]
        totalReviewers += reviewers
        avgRating += rating * reviewers
        
        #set reviewers to 0 for next iteration
        reviewers = 0
    avgRating /= float(totalReviewers)
    avgRating1MonthAgo.append(avgRating)
    
for ratingFile in ratingsFiles:
    
    avgRating = 0
    xMonthAgo = 2
    totalReviewers = 0
    
    #Aggreate the ratings for the month
    #ratingFile.iloc[5 - rating][month]
    for index in range(len(ratingFile)):
        rating = 5 - index
        reviewers = ratingFile.iloc[index][xMonthAgo]
        totalReviewers += reviewers
        avgRating += rating * reviewers
        
        #set reviewers to 0 for next iteration
        reviewers = 0
    avgRating /= float(totalReviewers)
    avgRating2MonthsAgo.append(avgRating)
 
#And for the third month ago
for ratingFile in ratingsFiles:
    
    avgRating = 0
    xMonthAgo = 3
    totalReviewers = 0
    
    #Aggreate the ratings for the month
    #ratingFile.iloc[5 - rating][month]
    for index in range(len(ratingFile)):
        rating = 5 - index
        reviewers = ratingFile.iloc[index][xMonthAgo]
        totalReviewers += reviewers
        avgRating += rating * reviewers
        
        #set reviewers to 0 for next iteration
        reviewers = 0
    avgRating /= float(totalReviewers)
    avgRating3MonthsAgo.append(avgRating)
    

3.25
4.2
4.8
3.4
3.2222222222222223
4.0
4.5
4.125
3.6363636363636362
4.0
3.9
4.444444444444445
4.285714285714286
3.2222222222222223
3.6
4.090909090909091
4.8
4.3
4.285714285714286
4.090909090909091
4.125
3.5
4.625
4.625
4.125
4.555555555555555
3.4444444444444446
2.8333333333333335
3.8
3.5833333333333335
