# Stock Market Sentiment analysis 

In [1]:
import pandas as pd 
import numpy as np 
from textblob import TextBlob
import re 
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

### Load DataSet 

In [2]:
!kaggle datasets download -d aaron7sun/stocknews

stocknews.zip: Skipping, found more recently modified local copy (use --force to force download)


In [3]:
path = r'/home/angelos/jupyter_notebooks/stocknews.zip'

#### We have multiple csv's in the same zip-file so we have to choose the one that interests us <br> which is the 1st one , containing historical news headlines.

In [4]:
from zipfile import ZipFile

In [5]:
with ZipFile(path) as z :
    DJ_news = pd.read_csv(z.open(z.infolist()[0].filename))

### Data Summary 1

In [6]:
DJ_news.info() , DJ_news.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1989 entries, 0 to 1988
Data columns (total 27 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    1989 non-null   object
 1   Label   1989 non-null   int64 
 2   Top1    1989 non-null   object
 3   Top2    1989 non-null   object
 4   Top3    1989 non-null   object
 5   Top4    1989 non-null   object
 6   Top5    1989 non-null   object
 7   Top6    1989 non-null   object
 8   Top7    1989 non-null   object
 9   Top8    1989 non-null   object
 10  Top9    1989 non-null   object
 11  Top10   1989 non-null   object
 12  Top11   1989 non-null   object
 13  Top12   1989 non-null   object
 14  Top13   1989 non-null   object
 15  Top14   1989 non-null   object
 16  Top15   1989 non-null   object
 17  Top16   1989 non-null   object
 18  Top17   1989 non-null   object
 19  Top18   1989 non-null   object
 20  Top19   1989 non-null   object
 21  Top20   1989 non-null   object
 22  Top21   1989 non-null   

(None, (1989, 27))

In [7]:
DJ_news.head()

Unnamed: 0,Date,Label,Top1,Top2,Top3,Top4,Top5,Top6,Top7,Top8,...,Top16,Top17,Top18,Top19,Top20,Top21,Top22,Top23,Top24,Top25
0,2008-08-08,0,"b""Georgia 'downs two Russian warplanes' as cou...",b'BREAKING: Musharraf to be impeached.',b'Russia Today: Columns of troops roll into So...,b'Russian tanks are moving towards the capital...,"b""Afghan children raped with 'impunity,' U.N. ...",b'150 Russian tanks have entered South Ossetia...,"b""Breaking: Georgia invades South Ossetia, Rus...","b""The 'enemy combatent' trials are nothing but...",...,b'Georgia Invades South Ossetia - if Russia ge...,b'Al-Qaeda Faces Islamist Backlash',"b'Condoleezza Rice: ""The US would not act to p...",b'This is a busy day: The European Union has ...,"b""Georgia will withdraw 1,000 soldiers from Ir...",b'Why the Pentagon Thinks Attacking Iran is a ...,b'Caucasus in crisis: Georgia invades South Os...,b'Indian shoe manufactory - And again in a se...,b'Visitors Suffering from Mental Illnesses Ban...,"b""No Help for Mexico's Kidnapping Surge"""
1,2008-08-11,1,b'Why wont America and Nato help us? If they w...,b'Bush puts foot down on Georgian conflict',"b""Jewish Georgian minister: Thanks to Israeli ...",b'Georgian army flees in disarray as Russians ...,"b""Olympic opening ceremony fireworks 'faked'""",b'What were the Mossad with fraudulent New Zea...,b'Russia angered by Israeli military sale to G...,b'An American citizen living in S.Ossetia blam...,...,b'Israel and the US behind the Georgian aggres...,"b'""Do not believe TV, neither Russian nor Geor...",b'Riots are still going on in Montreal (Canada...,b'China to overtake US as largest manufacturer',b'War in South Ossetia [PICS]',b'Israeli Physicians Group Condemns State Tort...,b' Russia has just beaten the United States ov...,b'Perhaps *the* question about the Georgia - R...,b'Russia is so much better at war',"b""So this is what it's come to: trading sex fo..."
2,2008-08-12,0,b'Remember that adorable 9-year-old who sang a...,"b""Russia 'ends Georgia operation'""","b'""If we had no sexual harassment we would hav...","b""Al-Qa'eda is losing support in Iraq because ...",b'Ceasefire in Georgia: Putin Outmaneuvers the...,b'Why Microsoft and Intel tried to kill the XO...,b'Stratfor: The Russo-Georgian War and the Bal...,"b""I'm Trying to Get a Sense of This Whole Geor...",...,b'U.S. troops still in Georgia (did you know t...,b'Why Russias response to Georgia was right',"b'Gorbachev accuses U.S. of making a ""serious ...","b'Russia, Georgia, and NATO: Cold War Two'",b'Remember that adorable 62-year-old who led y...,b'War in Georgia: The Israeli connection',b'All signs point to the US encouraging Georgi...,b'Christopher King argues that the US and NATO...,b'America: The New Mexico?',"b""BBC NEWS | Asia-Pacific | Extinction 'by man..."
3,2008-08-13,0,b' U.S. refuses Israel weapons to attack Iran:...,"b""When the president ordered to attack Tskhinv...",b' Israel clears troops who killed Reuters cam...,b'Britain\'s policy of being tough on drugs is...,b'Body of 14 year old found in trunk; Latest (...,b'China has moved 10 *million* quake survivors...,"b""Bush announces Operation Get All Up In Russi...",b'Russian forces sink Georgian ships ',...,b'Elephants extinct by 2020?',b'US humanitarian missions soon in Georgia - i...,"b""Georgia's DDOS came from US sources""","b'Russian convoy heads into Georgia, violating...",b'Israeli defence minister: US against strike ...,b'Gorbachev: We Had No Choice',b'Witness: Russian forces head towards Tbilisi...,b' Quarter of Russians blame U.S. for conflict...,b'Georgian president says US military will ta...,b'2006: Nobel laureate Aleksander Solzhenitsyn...
4,2008-08-14,1,b'All the experts admit that we should legalis...,b'War in South Osetia - 89 pictures made by a ...,b'Swedish wrestler Ara Abrahamian throws away ...,b'Russia exaggerated the death toll in South O...,b'Missile That Killed 9 Inside Pakistan May Ha...,"b""Rushdie Condemns Random House's Refusal to P...",b'Poland and US agree to missle defense deal. ...,"b'Will the Russians conquer Tblisi? Bet on it,...",...,b'Bank analyst forecast Georgian crisis 2 days...,"b""Georgia confict could set back Russia's US r...",b'War in the Caucasus is as much the product o...,"b'""Non-media"" photos of South Ossetia/Georgia ...",b'Georgian TV reporter shot by Russian sniper ...,b'Saudi Arabia: Mother moves to block child ma...,b'Taliban wages war on humanitarian aid workers',"b'Russia: World ""can forget about"" Georgia\'s...",b'Darfur rebels accuse Sudan of mounting major...,b'Philippines : Peace Advocate say Muslims nee...


### **NOTE**:
> The 'Label' Column represents the **adjusted close price** for a given day .<br>**0** means **DOWN** & **1** means **UP**

In [8]:
DJ_news.isnull().sum()

Date     0
Label    0
Top1     0
Top2     0
Top3     0
Top4     0
Top5     0
Top6     0
Top7     0
Top8     0
Top9     0
Top10    0
Top11    0
Top12    0
Top13    0
Top14    0
Top15    0
Top16    0
Top17    0
Top18    0
Top19    0
Top20    0
Top21    0
Top22    0
Top23    1
Top24    3
Top25    3
dtype: int64

#### The 2nd dataframe refers to 'Dow Jones Industrial Average' (DJIA) data and is used for validation 

In [9]:
with ZipFile(path) as z :
    DJ_average = pd.read_csv(z.open(z.infolist()[2].filename))

### Data Summary 2

In [10]:
DJ_average.info(), DJ_average.shape 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1989 entries, 0 to 1988
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       1989 non-null   object 
 1   Open       1989 non-null   float64
 2   High       1989 non-null   float64
 3   Low        1989 non-null   float64
 4   Close      1989 non-null   float64
 5   Volume     1989 non-null   int64  
 6   Adj Close  1989 non-null   float64
dtypes: float64(5), int64(1), object(1)
memory usage: 108.9+ KB


(None, (1989, 7))

In [11]:
DJ_average.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2016-07-01,17924.240234,18002.380859,17916.910156,17949.369141,82160000,17949.369141
1,2016-06-30,17712.759766,17930.609375,17711.800781,17929.990234,133030000,17929.990234
2,2016-06-29,17456.019531,17704.509766,17456.019531,17694.679688,106380000,17694.679688
3,2016-06-28,17190.509766,17409.720703,17190.509766,17409.720703,112190000,17409.720703
4,2016-06-27,17355.210938,17355.210938,17063.080078,17140.240234,138740000,17140.240234


In [12]:
DJ_average.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Volume       0
Adj Close    0
dtype: int64

## Merging the 2 datasets : 

In [13]:
merged_df = DJ_news.merge(DJ_average, how='inner', on='Date', left_index=True)

In [14]:
merged_df.info(), merged_df.shape

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1989 entries, 1988 to 0
Data columns (total 33 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       1989 non-null   object 
 1   Label      1989 non-null   int64  
 2   Top1       1989 non-null   object 
 3   Top2       1989 non-null   object 
 4   Top3       1989 non-null   object 
 5   Top4       1989 non-null   object 
 6   Top5       1989 non-null   object 
 7   Top6       1989 non-null   object 
 8   Top7       1989 non-null   object 
 9   Top8       1989 non-null   object 
 10  Top9       1989 non-null   object 
 11  Top10      1989 non-null   object 
 12  Top11      1989 non-null   object 
 13  Top12      1989 non-null   object 
 14  Top13      1989 non-null   object 
 15  Top14      1989 non-null   object 
 16  Top15      1989 non-null   object 
 17  Top16      1989 non-null   object 
 18  Top17      1989 non-null   object 
 19  Top18      1989 non-null   object 
 20  Top19   

(None, (1989, 33))

In [15]:
merged_df.head()

Unnamed: 0,Date,Label,Top1,Top2,Top3,Top4,Top5,Top6,Top7,Top8,...,Top22,Top23,Top24,Top25,Open,High,Low,Close,Volume,Adj Close
1988,2008-08-08,0,"b""Georgia 'downs two Russian warplanes' as cou...",b'BREAKING: Musharraf to be impeached.',b'Russia Today: Columns of troops roll into So...,b'Russian tanks are moving towards the capital...,"b""Afghan children raped with 'impunity,' U.N. ...",b'150 Russian tanks have entered South Ossetia...,"b""Breaking: Georgia invades South Ossetia, Rus...","b""The 'enemy combatent' trials are nothing but...",...,b'Caucasus in crisis: Georgia invades South Os...,b'Indian shoe manufactory - And again in a se...,b'Visitors Suffering from Mental Illnesses Ban...,"b""No Help for Mexico's Kidnapping Surge""",11432.089844,11759.959961,11388.040039,11734.320312,212830000,11734.320312
1987,2008-08-11,1,b'Why wont America and Nato help us? If they w...,b'Bush puts foot down on Georgian conflict',"b""Jewish Georgian minister: Thanks to Israeli ...",b'Georgian army flees in disarray as Russians ...,"b""Olympic opening ceremony fireworks 'faked'""",b'What were the Mossad with fraudulent New Zea...,b'Russia angered by Israeli military sale to G...,b'An American citizen living in S.Ossetia blam...,...,b' Russia has just beaten the United States ov...,b'Perhaps *the* question about the Georgia - R...,b'Russia is so much better at war',"b""So this is what it's come to: trading sex fo...",11729.669922,11867.110352,11675.530273,11782.349609,183190000,11782.349609
1986,2008-08-12,0,b'Remember that adorable 9-year-old who sang a...,"b""Russia 'ends Georgia operation'""","b'""If we had no sexual harassment we would hav...","b""Al-Qa'eda is losing support in Iraq because ...",b'Ceasefire in Georgia: Putin Outmaneuvers the...,b'Why Microsoft and Intel tried to kill the XO...,b'Stratfor: The Russo-Georgian War and the Bal...,"b""I'm Trying to Get a Sense of This Whole Geor...",...,b'All signs point to the US encouraging Georgi...,b'Christopher King argues that the US and NATO...,b'America: The New Mexico?',"b""BBC NEWS | Asia-Pacific | Extinction 'by man...",11781.700195,11782.349609,11601.519531,11642.469727,173590000,11642.469727
1985,2008-08-13,0,b' U.S. refuses Israel weapons to attack Iran:...,"b""When the president ordered to attack Tskhinv...",b' Israel clears troops who killed Reuters cam...,b'Britain\'s policy of being tough on drugs is...,b'Body of 14 year old found in trunk; Latest (...,b'China has moved 10 *million* quake survivors...,"b""Bush announces Operation Get All Up In Russi...",b'Russian forces sink Georgian ships ',...,b'Witness: Russian forces head towards Tbilisi...,b' Quarter of Russians blame U.S. for conflict...,b'Georgian president says US military will ta...,b'2006: Nobel laureate Aleksander Solzhenitsyn...,11632.80957,11633.780273,11453.339844,11532.959961,182550000,11532.959961
1984,2008-08-14,1,b'All the experts admit that we should legalis...,b'War in South Osetia - 89 pictures made by a ...,b'Swedish wrestler Ara Abrahamian throws away ...,b'Russia exaggerated the death toll in South O...,b'Missile That Killed 9 Inside Pakistan May Ha...,"b""Rushdie Condemns Random House's Refusal to P...",b'Poland and US agree to missle defense deal. ...,"b'Will the Russians conquer Tblisi? Bet on it,...",...,b'Taliban wages war on humanitarian aid workers',"b'Russia: World ""can forget about"" Georgia\'s...",b'Darfur rebels accuse Sudan of mounting major...,b'Philippines : Peace Advocate say Muslims nee...,11532.070312,11718.280273,11450.889648,11615.929688,159790000,11615.929688


In [16]:
merged_df.isnull().sum()

Date         0
Label        0
Top1         0
Top2         0
Top3         0
Top4         0
Top5         0
Top6         0
Top7         0
Top8         0
Top9         0
Top10        0
Top11        0
Top12        0
Top13        0
Top14        0
Top15        0
Top16        0
Top17        0
Top18        0
Top19        0
Top20        0
Top21        0
Top22        0
Top23        1
Top24        3
Top25        3
Open         0
High         0
Low          0
Close        0
Volume       0
Adj Close    0
dtype: int64

### Combining the news headlines 

In [17]:
headlines =[]
for row in range(0, len(merged_df.index)):
    headlines.append(' '.join( str(x) for x in merged_df.iloc[row, 2:27]) )

### The headlines of the **1st row **.

In [18]:
headlines[0]

'b"Georgia \'downs two Russian warplanes\' as countries move to brink of war" b\'BREAKING: Musharraf to be impeached.\' b\'Russia Today: Columns of troops roll into South Ossetia; footage from fighting (YouTube)\' b\'Russian tanks are moving towards the capital of South Ossetia, which has reportedly been completely destroyed by Georgian artillery fire\' b"Afghan children raped with \'impunity,\' U.N. official says - this is sick, a three year old was raped and they do nothing" b\'150 Russian tanks have entered South Ossetia whilst Georgia shoots down two Russian jets.\' b"Breaking: Georgia invades South Ossetia, Russia warned it would intervene on SO\'s side" b"The \'enemy combatent\' trials are nothing but a sham: Salim Haman has been sentenced to 5 1/2 years, but will be kept longer anyway just because they feel like it." b\'Georgian troops retreat from S. Osettain capital, presumably leaving several hundred people killed. [VIDEO]\' b\'Did the U.S. Prep Georgia for War with Russia?\'

## Data Cleaning 

#### As we can see there are some extra special characters that need to be removed. <br> This can be done using Regular Expressions

In [19]:
headlines_clean = [] 

for i in range (0, len(headlines)):
    headlines_clean.append(re.sub("b[(')]", '', headlines[i])) # replacing the character b' ( single quotation) with an empty string 
    headlines_clean[i] = re.sub('b[(")]', '', headlines_clean[i]) # replacing the character b" ( double quotation) with an empty string
    headlines_clean[i] = re.sub("\'", '', headlines_clean[i])# replacing the character \'( single quotation) with an empty string

In [20]:
headlines_clean[0]

'Georgia downs two Russian warplanes as countries move to brink of war" BREAKING: Musharraf to be impeached. Russia Today: Columns of troops roll into South Ossetia; footage from fighting (YouTube) Russian tanks are moving towards the capital of South Ossetia, which has reportedly been completely destroyed by Georgian artillery fire Afghan children raped with impunity, U.N. official says - this is sick, a three year old was raped and they do nothing" 150 Russian tanks have entered South Ossetia whilst Georgia shoots down two Russian jets. Breaking: Georgia invades South Ossetia, Russia warned it would intervene on SOs side" The enemy combatent trials are nothing but a sham: Salim Haman has been sentenced to 5 1/2 years, but will be kept longer anyway just because they feel like it." Georgian troops retreat from S. Osettain capital, presumably leaving several hundred people killed. [VIDEO] Did the U.S. Prep Georgia for War with Russia? Rice Gives Green Light for Israel to Attack Iran: S

## Adding the 'clean' headlines back to the merged dataset 

In [21]:
merged_df['News_Combined'] = headlines_clean

In [22]:
merged_df[['News_Combined']].head(5)

Unnamed: 0,News_Combined
1988,Georgia downs two Russian warplanes as countri...
1987,Why wont America and Nato help us? If they won...
1986,Remember that adorable 9-year-old who sang at ...
1985,U.S. refuses Israel weapons to attack Iran: r...
1984,All the experts admit that we should legalise ...


## Polarity 
 * Polarity is float which lies in the range of [-1,1] where 1 means positive statement and -1 means a negative statement.

In [23]:
def get_polarity(text):
    return TextBlob(text).sentiment.polarity

## Subjectivity

* Subjective sentences generally refer to personal opinion, emotion or judgment. Subjectivity is also a float which lies in the range of [0,1].

In [24]:
def get_subjectivity(text):
    return TextBlob(text).sentiment.subjectivity

Creating new columns for subjectivity and polarity :

In [25]:
merged_df['Subjectivity'] = merged_df['News_Combined'].apply(get_subjectivity)

In [26]:
merged_df['Polarity'] = merged_df['News_Combined'].apply(get_polarity)

In [33]:
merged_df[['Subjectivity', 'Polarity']].head(5)

Unnamed: 0,Subjectivity,Polarity
1988,0.267549,-0.048568
1987,0.374806,0.121956
1986,0.536234,-0.044302
1985,0.364021,0.011398
1984,0.375099,0.040677


Acquiring the sentiment scores using the 'Sentiment Intensity Analyser' provided by the [**vaderSentiment**](https://pypi.org/project/vaderSentiment/) module.<br>
"""VADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool that is specifically attuned to sentiments expressed in social media."""

In [28]:
def get_sentiment_int(text):
    analyser = SentimentIntensityAnalyzer()
    sentiment = analyser.polarity_scores(text)
    return sentiment

Scores per day :

In [34]:
compound_score = []
positive = []
negative = []
neutral = []
sentiment_int = 0 # Sentiment Intensity

for i in range(0, len(merged_df['News_Combined'])):
    sentiment_int = get_sentiment_int(merged_df['News_Combined'][i])
    compound_score.append(sentiment_int['compound'])
    positive.append(sentiment_int['pos'])
    negative.append(sentiment_int['neg'])
    neutral.append(sentiment_int['neu'])
    

In [35]:
merged_df['Compound_Score'] = compound_score
merged_df['Positive_Score'] = positive
merged_df['Negative_Score'] = negative
merged_df['Neutral_Score'] = neutral

In [40]:
merged_df.iloc[:,33:].head(5)

Unnamed: 0,News_Combined,Subjectivity,Polarity,Compound_Score,Positive_Score,Negative_Score,Neutral_Score
1988,Georgia downs two Russian warplanes as countri...,0.267549,-0.048568,-0.9983,0.059,0.212,0.729
1987,Why wont America and Nato help us? If they won...,0.374806,0.121956,-0.9977,0.061,0.202,0.738
1986,Remember that adorable 9-year-old who sang at ...,0.536234,-0.044302,-0.9975,0.091,0.225,0.684
1985,U.S. refuses Israel weapons to attack Iran: r...,0.364021,0.011398,-0.9571,0.102,0.132,0.767
1984,All the experts admit that we should legalise ...,0.375099,0.040677,-0.9644,0.094,0.148,0.758


### DATA Preprocessing

Create new DataFrame, containing only the **predictors** and the **target** variable  : 

In [42]:
metrics = ['Open', 'High', 'Low','Close', 'Volume', 'Adj Close', 'Subjectivity', 'Polarity', 'Compound_Score', 'Positive_Score', 'Negative_Score', 'Neutral_Score', 'Label']
df = merged_df[metrics]
df.head(3)

Unnamed: 0,Open,High,Low,Close,Volume,Adj Close,Subjectivity,Polarity,Compound_Score,Positive_Score,Negative_Score,Neutral_Score,Label
1988,11432.089844,11759.959961,11388.040039,11734.320312,212830000,11734.320312,0.267549,-0.048568,-0.9983,0.059,0.212,0.729,0
1987,11729.669922,11867.110352,11675.530273,11782.349609,183190000,11782.349609,0.374806,0.121956,-0.9977,0.061,0.202,0.738,1
1986,11781.700195,11782.349609,11601.519531,11642.469727,173590000,11642.469727,0.536234,-0.044302,-0.9975,0.091,0.225,0.684,0


**Predictors** (Independent Variables):

In [44]:
X = np.array(df.drop(['Label'], 1))

**Target** (Dependent Variable):

In [45]:
y = np.array(df['Label'])

### Splitting The Data

> 80% training ---- 20% testing

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### Building and Training the model
* Using Linear discriminant analysis
>[Linear discriminant analysis (LDA)](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) is a method used in statistics, pattern recognition, and machine learning to find a linear combination of features that characterizes or separates two or more classes of objects or events. The resulting combination may be used as a linear classifier, or, more commonly, for dimensionality reduction before later classification. 

In [47]:
ln_disc_an = LinearDiscriminantAnalysis()
ln_disc_an.fit(X_train, y_train)

LinearDiscriminantAnalysis()

In [48]:
pred = ln_disc_an.predict(X_test)
pred

array([1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1,
       0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,

In [49]:
ln_disc_an.coef_ # Weight vector for the features.

array([[-2.33061754e-02,  3.36498169e-03,  5.73529161e-03,
         7.10572567e-03, -9.90869231e-11,  7.10572567e-03,
        -3.30769350e-01, -6.05414393e-01,  3.77089056e-01,
        -1.10802819e+01, -1.42348331e+01, -1.27801075e+01]])

In [50]:
ln_disc_an.classes_ # Unique class labels.

array([0, 1])

### Model Accuracy 

In [52]:
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.99      0.88      0.93       183
           1       0.91      1.00      0.95       215

    accuracy                           0.94       398
   macro avg       0.95      0.94      0.94       398
weighted avg       0.95      0.94      0.94       398



#### We get a **94%** model **accuracy**.