In [None]:
#import libraries
import pandas as pd
import numpy as np
from textblob import TextBlob
import re
import nltk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


In [None]:
#load the transcript data for S&P 100 for 2020 only
#read csv that includes stock data
transcribed_list= pd.read_csv('transcribed_transcript_list2.csv')
transcribed_list.head(10)

In [None]:
#store the data
#transcript_prep_remarks
transcribed_list_org= transcribed_list

In [4]:
transcribed_list_org.dtypes

Unnamed: 0                  int64
date                       object
ticker                     object
title                      object
transcript                 object
transcript_prep_remarks    object
url                        object
dtype: object

In [5]:
#correct earnings call date to correctly associate in the join
transcribed_list_org['date_rev2']= pd.to_datetime(transcribed_list_org['date'])
transcribed_list_org['call_date']= transcribed_list_org['date_rev2'].dt.strftime('%m/%d/%Y')

In [7]:
transcribed_list_org.head(5)

Unnamed: 0.1,Unnamed: 0,date,ticker,title,transcript,transcript_prep_remarks,url,date_rev2,call_date
0,0,10/29/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q4 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q4 2020 Earnings Conf...,"['Operator', 'Good day everyone and welcome to...",/article/4382943-apple-inc-aapl-ceo-tim-cook-o...,2020-10-29,10/29/2020
1,1,7/30/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q3 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q3 2020 Results Confe...,"['Operator', ""Good day, everyone. Welcome to t...",/article/4362707-apple-inc-aapl-ceo-tim-cook-o...,2020-07-30,07/30/2020
2,2,4/30/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q2 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q2 2020 Results Confe...,"['Operator', 'Good day everyone. Welcome to th...",/article/4341792-apple-inc-aapl-ceo-tim-cook-o...,2020-04-30,04/30/2020
3,3,1/28/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q1 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q1 2020 Results Confe...,"['Operator', ""Good day, everyone. Welcome to t...",/article/4319666-apple-inc-aapl-ceo-tim-cook-o...,2020-01-28,01/28/2020
4,8,10/30/2020,ABBV,AbbVie Inc.'s (ABBV) CEO Rick Gonzalez on Q3 2...,AbbVie Inc. (NYSE:ABBV) Q3 2020 Results Earnin...,"['Operator', 'Good morning and thank you for s...",/article/4383381-abbvie-inc-s-abbv-ceo-rick-go...,2020-10-30,10/30/2020


In [8]:
transcribed_list_org= transcribed_list_org.rename(columns={'ticker': 'Symbol'})
transcribed_list_org.head(3)

Unnamed: 0.1,Unnamed: 0,date,Symbol,title,transcript,transcript_prep_remarks,url,date_rev2,call_date
0,0,10/29/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q4 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q4 2020 Earnings Conf...,"['Operator', 'Good day everyone and welcome to...",/article/4382943-apple-inc-aapl-ceo-tim-cook-o...,2020-10-29,10/29/2020
1,1,7/30/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q3 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q3 2020 Results Confe...,"['Operator', ""Good day, everyone. Welcome to t...",/article/4362707-apple-inc-aapl-ceo-tim-cook-o...,2020-07-30,07/30/2020
2,2,4/30/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q2 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q2 2020 Results Confe...,"['Operator', 'Good day everyone. Welcome to th...",/article/4341792-apple-inc-aapl-ceo-tim-cook-o...,2020-04-30,04/30/2020


In [9]:
#create a function to get the subjectivity
def getSubjectivity(text):
    return TextBlob(text).sentiment.subjectivity

In [10]:
#create function to get the polarity
def getPolarity(text):
    return TextBlob(text).sentiment.polarity


In [11]:
#create new columns for subjectivity and polarity on MERGED DATA

transcribed_list_org['Subjectivity']= transcribed_list_org['transcript'].apply(getSubjectivity)

transcribed_list_org['Polarity']= transcribed_list_org['transcript'].apply(getPolarity)

In [12]:
transcribed_list_org['Subjectivity'][0]

0.4755678464000953

In [13]:
#create function to get sentiment score using Sentiment Intensity Analyzer

def getSIA(text):
    sia=SentimentIntensityAnalyzer()
    
    sentiment=sia.polarity_scores(text)
    return sentiment

In [None]:
compound=[]
neg= []
pos = []
neu = []
SIA = 0

for i in range(0, len(transcribed_list_org['transcript'])):
    SIA = getSIA(transcribed_list_org['transcript'][i])
    compound.append(SIA['compound'])
    neg.append(SIA['neg'])
    neu.append(SIA['neu'])
    pos.append(SIA['pos'])

In [None]:
pos[:3]
compound[:3]
neg[:3]
neu[:3]
SIA

In [None]:
#store sentiment scores in dataframe
transcribed_list_org['Compound']=compound
transcribed_list_org['Negative']=neg
transcribed_list_org['Positive']=pos
transcribed_list_org['Neutral']=neu

In [None]:
transcribed_list_org.head(3)

In [None]:
transcribed_list_org.columns

In [None]:
#display sentiment values to determine category value which will be categorized as 1 for positive values over 0.10 and 0 is under 0.10 since average is 0.143419
transcribed_list_org[['Subjectivity', 'Polarity','Negative', 'Positive', 'Neutral']].describe()

In [None]:
#made label column that assigns 1 to positive values over 0.10 and 0 is under 0.10
transcribed_list_org['Label']= np.where(transcribed_list_org['Positive']>0.100, 1, 0)

In [None]:
transcribed_list_org.head(10)

In [18]:
#load the price data for S&P 100 for 2020 only

price_list= pd.read_csv('combined_snp100_data.csv')
price_list.head(10)

Unnamed: 0.1,Unnamed: 0,Symbol,Name,Date,Adj_Close,Close,High,Low,Open,Volume
0,0,AAPL,Apple Inc.,2020-01-02,74.444603,75.087502,75.150002,73.797501,74.059998,135480400
1,1,AAPL,Apple Inc.,2020-01-03,73.72084,74.357498,75.144997,74.125,74.287498,146322800
2,2,AAPL,Apple Inc.,2020-01-06,74.308266,74.949997,74.989998,73.1875,73.447502,118387200
3,3,AAPL,Apple Inc.,2020-01-07,73.958794,74.597504,75.224998,74.370003,74.959999,108872000
4,4,AAPL,Apple Inc.,2020-01-08,75.148521,75.797501,76.110001,74.290001,74.290001,132079200
5,5,AAPL,Apple Inc.,2020-01-09,76.744728,77.407501,77.607498,76.550003,76.809998,170108400
6,6,AAPL,Apple Inc.,2020-01-10,76.918221,77.582497,78.167503,77.0625,77.650002,140644800
7,7,AAPL,Apple Inc.,2020-01-13,78.561531,79.239998,79.267502,77.787498,77.910004,121532000
8,8,AAPL,Apple Inc.,2020-01-14,77.500702,78.169998,79.392502,78.042503,79.175003,161954400
9,9,AAPL,Apple Inc.,2020-01-15,77.168564,77.834999,78.875,77.387497,77.962502,121923600


In [19]:
price_list.dtypes

Unnamed: 0      int64
Symbol         object
Name           object
Date           object
Adj_Close     float64
Close         float64
High          float64
Low           float64
Open          float64
Volume         object
dtype: object

In [20]:
#correct stock price date to correctly associate in the join
price_list['date_rev2']= pd.to_datetime(price_list['Date'])
price_list['stock_price_date']= price_list['date_rev2'].dt.strftime('%m/%d/%Y')

In [21]:
price_list.head(5)

Unnamed: 0.1,Unnamed: 0,Symbol,Name,Date,Adj_Close,Close,High,Low,Open,Volume,date_rev2,stock_price_date
0,0,AAPL,Apple Inc.,2020-01-02,74.444603,75.087502,75.150002,73.797501,74.059998,135480400,2020-01-02,01/02/2020
1,1,AAPL,Apple Inc.,2020-01-03,73.72084,74.357498,75.144997,74.125,74.287498,146322800,2020-01-03,01/03/2020
2,2,AAPL,Apple Inc.,2020-01-06,74.308266,74.949997,74.989998,73.1875,73.447502,118387200,2020-01-06,01/06/2020
3,3,AAPL,Apple Inc.,2020-01-07,73.958794,74.597504,75.224998,74.370003,74.959999,108872000,2020-01-07,01/07/2020
4,4,AAPL,Apple Inc.,2020-01-08,75.148521,75.797501,76.110001,74.290001,74.290001,132079200,2020-01-08,01/08/2020


In [22]:
merge_price_trans3=pd.merge(price_list, transcribed_list_org, how='outer', left_on=['Symbol', 'stock_price_date'],right_on=['Symbol', 'call_date'] )
merge_price_trans3.head(29)
#merge_price_trans.columns



Unnamed: 0,Unnamed: 0_x,Symbol,Name,Date,Adj_Close,Close,High,Low,Open,Volume,...,transcript_prep_remarks,url,date_rev2_y,call_date,Subjectivity,Polarity,Compound,Negative,Positive,Neutral
0,0,AAPL,Apple Inc.,2020-01-02,74.444603,75.087502,75.150002,73.797501,74.059998,135480400,...,,,NaT,,,,,,,
1,1,AAPL,Apple Inc.,2020-01-03,73.72084,74.357498,75.144997,74.125,74.287498,146322800,...,,,NaT,,,,,,,
2,2,AAPL,Apple Inc.,2020-01-06,74.308266,74.949997,74.989998,73.1875,73.447502,118387200,...,,,NaT,,,,,,,
3,3,AAPL,Apple Inc.,2020-01-07,73.958794,74.597504,75.224998,74.370003,74.959999,108872000,...,,,NaT,,,,,,,
4,4,AAPL,Apple Inc.,2020-01-08,75.148521,75.797501,76.110001,74.290001,74.290001,132079200,...,,,NaT,,,,,,,
5,5,AAPL,Apple Inc.,2020-01-09,76.744728,77.407501,77.607498,76.550003,76.809998,170108400,...,,,NaT,,,,,,,
6,6,AAPL,Apple Inc.,2020-01-10,76.918221,77.582497,78.167503,77.0625,77.650002,140644800,...,,,NaT,,,,,,,
7,7,AAPL,Apple Inc.,2020-01-13,78.561531,79.239998,79.267502,77.787498,77.910004,121532000,...,,,NaT,,,,,,,
8,8,AAPL,Apple Inc.,2020-01-14,77.500702,78.169998,79.392502,78.042503,79.175003,161954400,...,,,NaT,,,,,,,
9,9,AAPL,Apple Inc.,2020-01-15,77.168564,77.834999,78.875,77.387497,77.962502,121923600,...,,,NaT,,,,,,,


In [23]:
merge_price_trans4=merge_price_trans3.fillna('')
merge_price_trans4.head(29)
#merge_price_trans.columns


Unnamed: 0,Unnamed: 0_x,Symbol,Name,Date,Adj_Close,Close,High,Low,Open,Volume,...,transcript_prep_remarks,url,date_rev2_y,call_date,Subjectivity,Polarity,Compound,Negative,Positive,Neutral
0,0,AAPL,Apple Inc.,2020-01-02,74.444603,75.087502,75.150002,73.797501,74.059998,135480400,...,,,,,,,,,,
1,1,AAPL,Apple Inc.,2020-01-03,73.72084,74.357498,75.144997,74.125,74.287498,146322800,...,,,,,,,,,,
2,2,AAPL,Apple Inc.,2020-01-06,74.308266,74.949997,74.989998,73.1875,73.447502,118387200,...,,,,,,,,,,
3,3,AAPL,Apple Inc.,2020-01-07,73.958794,74.597504,75.224998,74.370003,74.959999,108872000,...,,,,,,,,,,
4,4,AAPL,Apple Inc.,2020-01-08,75.148521,75.797501,76.110001,74.290001,74.290001,132079200,...,,,,,,,,,,
5,5,AAPL,Apple Inc.,2020-01-09,76.744728,77.407501,77.607498,76.550003,76.809998,170108400,...,,,,,,,,,,
6,6,AAPL,Apple Inc.,2020-01-10,76.918221,77.582497,78.167503,77.0625,77.650002,140644800,...,,,,,,,,,,
7,7,AAPL,Apple Inc.,2020-01-13,78.561531,79.239998,79.267502,77.787498,77.910004,121532000,...,,,,,,,,,,
8,8,AAPL,Apple Inc.,2020-01-14,77.500702,78.169998,79.392502,78.042503,79.175003,161954400,...,,,,,,,,,,
9,9,AAPL,Apple Inc.,2020-01-15,77.168564,77.834999,78.875,77.387497,77.962502,121923600,...,,,,,,,,,,


In [40]:
merge_price_trans4.head(3)
merge_price_trans4.columns

Index(['Unnamed: 0_x', 'Symbol', 'Name', 'Date', 'Adj_Close', 'Close', 'High',
       'Low', 'Open', 'Volume', 'date_rev2_x', 'stock_price_date',
       'Unnamed: 0_y', 'date', 'title', 'transcript',
       'transcript_prep_remarks', 'url', 'date_rev2_y', 'call_date',
       'Subjectivity', 'Polarity', 'Compound', 'Negative', 'Positive',
       'Neutral'],
      dtype='object')

In [38]:
merge_price_trans5= merge_price_trans4.drop(['Unnamed: 0_x','Date', 'date_rev2_x', 'date', 'Unnamed: 0_y', 'date_rev2_y' ],1)
merge_price_trans5.head(3)
#print(merge_price_trans5.columns)

Unnamed: 0,Symbol,Name,Adj_Close,Close,High,Low,Open,Volume,stock_price_date,title,transcript,transcript_prep_remarks,url,call_date,Subjectivity,Polarity,Compound,Negative,Positive,Neutral
0,AAPL,Apple Inc.,74.444603,75.087502,75.150002,73.797501,74.059998,135480400,01/02/2020,,,,,,,,,,,
1,AAPL,Apple Inc.,73.72084,74.357498,75.144997,74.125,74.287498,146322800,01/03/2020,,,,,,,,,,,
2,AAPL,Apple Inc.,74.308266,74.949997,74.989998,73.1875,73.447502,118387200,01/06/2020,,,,,,,,,,,


In [39]:
merge_price_trans6=merge_price_trans5[['Symbol','Name', 'stock_price_date','Adj_Close', 'Close', 'High', 'Low', 'Open',
       'Volume','call_date','title', 'transcript', 'transcript_prep_remarks', 'url','Subjectivity', 'Polarity', 'Compound', 'Negative', 'Positive', 'Neutral', 'Label']]
merge_price_trans6.head(3)


Unnamed: 0,Symbol,Name,stock_price_date,Adj_Close,Close,High,Low,Open,Volume,call_date,title,transcript,transcript_prep_remarks,url,Subjectivity,Polarity,Compound,Negative,Positive,Neutral
0,AAPL,Apple Inc.,01/02/2020,74.444603,75.087502,75.150002,73.797501,74.059998,135480400,,,,,,,,,,,
1,AAPL,Apple Inc.,01/03/2020,73.72084,74.357498,75.144997,74.125,74.287498,146322800,,,,,,,,,,,
2,AAPL,Apple Inc.,01/06/2020,74.308266,74.949997,74.989998,73.1875,73.447502,118387200,,,,,,,,,,,


In [41]:
merge_price_trans6.to_csv('full_trans_sents_score_wstockprice.csv')

In [42]:
sent_test2= pd.read_csv('full_trans_sents_score_wstockprice.csv')
sent_test2.head(10)

Unnamed: 0.1,Unnamed: 0,Symbol,Name,stock_price_date,Adj_Close,Close,High,Low,Open,Volume,...,title,transcript,transcript_prep_remarks,url,Subjectivity,Polarity,Compound,Negative,Positive,Neutral
0,0,AAPL,Apple Inc.,01/02/2020,74.444603,75.087502,75.150002,73.797501,74.059998,135480400,...,,,,,,,,,,
1,1,AAPL,Apple Inc.,01/03/2020,73.72084,74.357498,75.144997,74.125,74.287498,146322800,...,,,,,,,,,,
2,2,AAPL,Apple Inc.,01/06/2020,74.308266,74.949997,74.989998,73.1875,73.447502,118387200,...,,,,,,,,,,
3,3,AAPL,Apple Inc.,01/07/2020,73.958794,74.597504,75.224998,74.370003,74.959999,108872000,...,,,,,,,,,,
4,4,AAPL,Apple Inc.,01/08/2020,75.148521,75.797501,76.110001,74.290001,74.290001,132079200,...,,,,,,,,,,
5,5,AAPL,Apple Inc.,01/09/2020,76.744728,77.407501,77.607498,76.550003,76.809998,170108400,...,,,,,,,,,,
6,6,AAPL,Apple Inc.,01/10/2020,76.918221,77.582497,78.167503,77.0625,77.650002,140644800,...,,,,,,,,,,
7,7,AAPL,Apple Inc.,01/13/2020,78.561531,79.239998,79.267502,77.787498,77.910004,121532000,...,,,,,,,,,,
8,8,AAPL,Apple Inc.,01/14/2020,77.500702,78.169998,79.392502,78.042503,79.175003,161954400,...,,,,,,,,,,
9,9,AAPL,Apple Inc.,01/15/2020,77.168564,77.834999,78.875,77.387497,77.962502,121923600,...,,,,,,,,,,


In [43]:
merge_price_trans6.head(3)

Unnamed: 0,Symbol,Name,stock_price_date,Adj_Close,Close,High,Low,Open,Volume,call_date,title,transcript,transcript_prep_remarks,url,Subjectivity,Polarity,Compound,Negative,Positive,Neutral
0,AAPL,Apple Inc.,01/02/2020,74.444603,75.087502,75.150002,73.797501,74.059998,135480400,,,,,,,,,,,
1,AAPL,Apple Inc.,01/03/2020,73.72084,74.357498,75.144997,74.125,74.287498,146322800,,,,,,,,,,,
2,AAPL,Apple Inc.,01/06/2020,74.308266,74.949997,74.989998,73.1875,73.447502,118387200,,,,,,,,,,,


In [44]:
transcribed_keep2=merge_price_trans6.drop(['Name','title','transcript',
       'transcript_prep_remarks', 'url', 'Volume', 'Compound'],1)
transcribed_keep2.head(3)

Unnamed: 0,Symbol,stock_price_date,Adj_Close,Close,High,Low,Open,call_date,Subjectivity,Polarity,Negative,Positive,Neutral
0,AAPL,01/02/2020,74.444603,75.087502,75.150002,73.797501,74.059998,,,,,,
1,AAPL,01/03/2020,73.72084,74.357498,75.144997,74.125,74.287498,,,,,,
2,AAPL,01/06/2020,74.308266,74.949997,74.989998,73.1875,73.447502,,,,,,


In [67]:
transcribed_keep2.dtypes

Symbol               object
stock_price_date     object
Adj_Close           float64
Close               float64
High                float64
Low                 float64
Open                float64
call_date            object
Subjectivity         object
Polarity             object
Negative             object
Positive             object
Neutral              object
dtype: object

In [37]:
transcribed_keep2

Unnamed: 0_level_0,Unnamed: 1_level_0,Adj_Close,Close,High,Low,Open,Subjectivity,Polarity,Compound,Negative,Positive,Neutral,Label
Date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
10/29/2020,AAPL,115.121384,115.320000,116.930000,112.199997,112.370003,0.492110,0.168114,1.0000,0.036,0.167,0.797,1
07/30/2020,AAPL,95.851517,96.190002,96.297501,93.767502,94.187500,0.482856,0.164288,1.0000,0.028,0.171,0.801,1
04/30/2020,AAPL,72.993935,73.449997,73.632500,72.087502,72.489998,0.448222,0.193248,1.0000,0.030,0.157,0.813,1
01/28/2020,AAPL,78.742477,79.422501,79.599998,78.047501,78.150002,0.464910,0.192856,0.9999,0.024,0.165,0.811,1
10/30/2020,ABBV,84.116188,85.099998,86.139999,83.000000,83.199997,0.436565,0.143596,0.9999,0.017,0.133,0.850,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
05/19/2020,WMT,123.985207,124.949997,131.990005,124.809998,131.750000,0.000000,0.000000,0.0000,0.000,0.000,1.000,0
10/30/2020,XOM,31.851534,32.619999,33.139999,32.009998,32.410000,0.382868,0.092255,0.9998,0.044,0.114,0.842,1
07/31/2020,XOM,40.293766,42.080002,42.169998,40.910000,41.160000,0.395989,0.093709,0.9998,0.041,0.099,0.860,0
05/01/2020,XOM,40.523052,43.139999,46.389999,43.000000,45.630001,0.408350,0.133240,0.9999,0.045,0.133,0.823,1


In [None]:
transcribed_keep3=transcribed_keep2.replace(r'^\s*$', np.nan, regex=True)
transcribed_keep3

In [None]:
transcribed_keep4=transcribed_keep3.dropna()
transcribed_keep4

In [None]:
transcribed_keep4.columns

In [38]:
#create featured data set
X= transcribed_keep4
X=np.array(X.drop(['Label', 'Symbol', 'stock_price_date','call_date'], 1))

#create target data set
y=np.array(transcribed_keep2['Label'])

In [39]:
X

array([[1.15121384e+02, 1.15320000e+02, 1.16930000e+02, ...,
        3.60000000e-02, 1.67000000e-01, 7.97000000e-01],
       [9.58515167e+01, 9.61900024e+01, 9.62975006e+01, ...,
        2.80000000e-02, 1.71000000e-01, 8.01000000e-01],
       [7.29939346e+01, 7.34499969e+01, 7.36324997e+01, ...,
        3.00000000e-02, 1.57000000e-01, 8.13000000e-01],
       ...,
       [4.02937660e+01, 4.20800018e+01, 4.21699982e+01, ...,
        4.10000000e-02, 9.90000000e-02, 8.60000000e-01],
       [4.05230522e+01, 4.31399994e+01, 4.63899994e+01, ...,
        4.50000000e-02, 1.33000000e-01, 8.23000000e-01],
       [5.75258255e+01, 6.21199989e+01, 6.32000008e+01, ...,
        2.90000000e-02, 1.54000000e-01, 8.17000000e-01]])

In [40]:
y

array([1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1,

In [41]:
#split the data into 80% training and 20% testing data sets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=0)

In [42]:
#create and train the model
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
model=LinearDiscriminantAnalysis().fit(x_train, y_train)

In [43]:
#get the models predictions and classifications
predictions= model.predict(x_test)
predictions

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [1]:
#show the models metrics of 1 to positive values over 0.10 and 0 is under 0.10

#A classification report is one that details our model's accuracy among 3 metrics: precision, recall, and f1 score.

#Precision is the ratio of correctly predicted positive observations to the total PREDICTED positive observations.
#In other words, of the rows that our model predicted to be positive, what percentage actually were positive?

#Recall is the ratio of correctly predicted positive observations to the total ACTUAL positive observations.
#In other words, of the rows that were actually positive, what percentage did I predict to be positive?

#F1 Score is the harmonic average of precision and recall and can be used as a measure of model performance for classification
#An ideal F1 score is close to 1.0

from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))

NameError: name 'y_test' is not defined