In [61]:
import json
import pprint

import datetime
import requests
import gzip
import subprocess
import re
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import yfinance as yf
from finta import TA

from sklearn import svm
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, mean_squared_error, accuracy_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

import nltk
from nltk.parse.corenlp import CoreNLPServer, CoreNLPParser, CoreNLPDependencyParser
from nltk.tokenize import sent_tokenize
from collections import Counter

import stanfordnlp

In [60]:
nltk.download('averaged_perceptron_tagger')

java_path = "C:/Program Files (x86)/Java/jdk1.8.0_321/bin/java.exe"
os.environ['JAVAHOME'] = java_path
nltk.internals.config_java(java_path)

CORENLP_JAR = os.path.join("models", "stanford-corenlp-4.5.4", "stanford-corenlp-4.5.4.jar")

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\ioana\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [52]:
STANFORD = os.path.join("models", "stanford-corenlp-4.5.4")

# Create the server
server = CoreNLPServer(
    os.path.join(STANFORD, "stanford-corenlp-4.5.4.jar"),
    os.path.join(STANFORD, "stanford-corenlp-4.5.4-models.jar"),    
)

# Start the server in the background
server.start()

KeyboardInterrupt: 

In [21]:
corpus_path = "data\\formatted_corpus.json"
with open(corpus_path, 'r', encoding='utf-8') as corpus_json:
    corpus = json.load(corpus_json)
print("Corpus size: ", len(corpus))

Corpus size:  1224


Visualize data

In [46]:
FKRE_ratings = {}
FKRE_scores = []
WPMs = []
NAWLs = []
NGSLs = []
seconds = []
view_counts = []
like_counts = []
likes_per_view = []
for talk in corpus:
    if talk['FKRE_rating'] in FKRE_ratings:
        FKRE_ratings[talk['FKRE_rating']] += 1
    else:
        FKRE_ratings[talk['FKRE_rating']] = 1
    FKRE_scores.append(talk['FKRE_score'])
    WPMs.append(talk['WPM'])
    NAWLs.append(talk['NAWL'])
    NGSLs.append(talk['NGSL'])
    seconds.append(talk['seconds'])
    view_counts.append(talk['view_count'])
    like_counts.append(talk['like_count'])
    likes_per_view.append(talk['likes_per_view'])

print(f'FKRE ratings: {FKRE_ratings}')
print(f'FKRE scores: min {min(FKRE_scores)}, max {max(FKRE_scores)}, average {sum(FKRE_scores) / len(FKRE_scores)}')
print(f'Words per minute: min {min(WPMs)}, max {max(WPMs)}, average {sum(WPMs) / len(WPMs)}')
print(f'Words in the New Academic Word List: min {min(NAWLs)}, max {max(NAWLs)}, average {sum(NAWLs) / len(NAWLs)}')
print(f'Words in the New General Service List: min {min(NGSLs)}, max {max(NGSLs)}, average {sum(NGSLs) / len(NGSLs)}')
print(f'Length in seconds: min {min(seconds)}, max {max(seconds)}, average {sum(seconds) / len(seconds)}')
print(f'Number of views: min {min(view_counts)}, max {max(view_counts)}, average {sum(view_counts) / len(view_counts)}')
print(f'Number of likes: min {min(like_counts)}, max {max(like_counts)}, average {sum(like_counts) / len(like_counts)}')
print(f'Number of likes per view: min {min(likes_per_view)}, max {max(likes_per_view)}, average {sum(likes_per_view) / len(likes_per_view)}')

plt.figure(figsize=(50, 20))

plt.bar(range(len(FKRE_scores)), FKRE_scores)
plt.title('FKRE scores')
plt.savefig(f'data/stats/FKRE_scores.jpg')
plt.clf()

plt.bar(range(len(WPMs)), WPMs)
plt.title('Words per minute')
plt.savefig(f'data/stats/WPMs.jpg')
plt.clf()

plt.bar(range(len(NAWLs)), NAWLs)
plt.title('Words in the New Academic Word List')
plt.savefig(f'data/stats/NAWLs.jpg')
plt.clf()

plt.bar(range(len(NGSLs)), NGSLs)
plt.title('Words in the New General Service List')
plt.savefig(f'data/stats/NGSLs.jpg')
plt.clf()

plt.bar(range(len(seconds)), seconds)
plt.title('Length in seconds')
plt.savefig(f'data/stats/seconds.jpg')
plt.clf()

plt.bar(range(len(view_counts)), view_counts)
plt.title('Number of views')
plt.savefig(f'data/stats/view_counts.jpg')
plt.clf()

plt.bar(range(len(like_counts)), like_counts)
plt.title('Number of likes')
plt.savefig(f'data/stats/like_counts.jpg')
plt.clf()

plt.bar(range(len(likes_per_view)), likes_per_view)
plt.title('Number of likes per view')
plt.savefig(f'data/stats/likes_per_view.jpg')
plt.clf()

FKRE ratings: {'Fairly easy': 566, 'Plain English': 368, 'Easy': 178, 'Fairly difficult': 94, 'Very easy': 8, 'Difficult': 10}
FKRE scores: min 41.7, max 94.9, average 71.62647058823535
Words per minute: min 71.0, max 275.0, average 160.2393790849673
Words in the New Academic Word List: min 1, max 61, average 19.40767973856209
Words in the New General Service List: min 224, max 645, average 415.577614379085
Length in seconds: min 481, max 1199, average 884.9648692810457
Number of views: min 231850, max 75063761, average 2817607.3333333335
Number of likes: min 10000, max 22000000, average 175500.0
Number of likes per view: min 0.027548623320160084, max 0.2995794216171842, average 0.03349684151685497


<Figure size 5000x2000 with 0 Axes>

Normalize metrics

In [49]:
weights = np.array([4, 1, 1, 1, 1, 5, 3, 2, 1])
normalized_array = np.divide(weights, np.sum(weights))
print(sum(normalized_array))

1.0


In [58]:
data = pd.DataFrame(corpus)

# Extract laughter, applause, and cheering counts from the "total_responses" dictionary
data['laughter'] = data['total_responses'].apply(lambda x: x['laughter'])
data['applause'] = data['total_responses'].apply(lambda x: x['applause'])
data['cheering'] = data['total_responses'].apply(lambda x: x['cheering'])

# Normalize the metrics
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data[['FKRE_score', 'WPM', 'NAWL', 'NGSL', 'seconds', 'likes_per_view', 'laughter', 'applause', 'cheering']])
weights = np.array([4, 1, 1, 1, 1, 5, 3, 2, 1])
weights = np.divide(weights, np.sum(weights))

# Calculate the popularity score
popularity_score = np.dot(normalized_data, weights)

# Add the popularity score to the DataFrame
data['popularity_score'] = popularity_score

# Classify the talks using K-means clustering
kmeans = KMeans(n_clusters=5, random_state=42)
data['popularity_category'] = kmeans.fit_predict(data[['popularity_score']])

# Calculate the average popularity score for each cluster
cluster_popularity = data.groupby('popularity_category')['popularity_score'].mean()

# Sort the clusters by popularity score in descending order
sorted_clusters = cluster_popularity.sort_values(ascending=False)

# Count the number of talks in each cluster
talks_per_cluster = data['popularity_category'].value_counts()

# Print the ranking of the clusters by popularity score
for i, cluster in enumerate(sorted_clusters.index):
    print(f"Cluster {cluster}: Rank {i+1}, {talks_per_cluster[cluster]} talks")

display(data)



Cluster 2: Rank 1, 22 talks
Cluster 1: Rank 2, 171 talks
Cluster 3: Rank 3, 386 talks
Cluster 4: Rank 4, 411 talks
Cluster 0: Rank 5, 234 talks


Unnamed: 0,title,FKRE_rating,FKRE_score,WPM,NAWL,NGSL,URL,seconds,view_count,like_count,transcript,raw_transcript,likes_per_view,total_responses,laughter,applause,cheering,popularity_score,popularity_category
0,Aaron Huey: America's native prisoners of war,Fairly easy,70.0,146.0,12,458,https://www.ted.com/talks/aaron_huey,916,1970692,59000,[{'sentence': 'I'm here today to show my photo...,i'm here today to show my photographs of the l...,0.029939,"{'laughter': 0, 'applause': 1, 'cheering': 0}",0,1,0,0.211961,4
1,"Abha Dawesar: Life in the ""digital now""",Fairly easy,74.7,169.0,16,435,https://www.ted.com/talks/abha_dawesar_life_in...,713,1369143,41000,[{'sentence': 'I was in New York during Hurric...,"i was in new york during hurricane sandy, and ...",0.029946,"{'laughter': 0, 'applause': 1, 'cheering': 0}",0,1,0,0.222254,4
2,Abraham Verghese: A doctor's touch,Fairly easy,70.1,170.0,41,526,https://www.ted.com/talks/abraham_verghese_a_d...,1100,1992577,59000,"[{'sentence': 'A few months ago, a 40 year-old...","a few months ago, a 40 year-old woman came to ...",0.029610,"{'laughter': 0, 'applause': 1, 'cheering': 0}",0,1,0,0.265658,3
3,Adam Davidson: What we learned from teetering ...,Plain English,61.1,165.0,24,546,https://www.ted.com/talks/adam_davidson_what_w...,1177,838052,25000,[{'sentence': 'So a friend of mine who's a pol...,so a friend of mine who's a political scientis...,0.029831,"{'laughter': 0, 'applause': 1, 'cheering': 0}",0,1,0,0.222199,4
4,"Adam Garone: Healthier men, one moustache at a...",Fairly easy,74.2,171.0,12,416,https://www.ted.com/talks/adam_garone_healthie...,989,755891,22000,[{'sentence': 'I think the beautiful Malin [Ak...,i think the beautiful malin [akerman] put it p...,0.029105,"{'laughter': 28, 'applause': 4, 'cheering': 0}",28,4,0,0.318285,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1219,Zahra' Langhi: Why Libya's revolution didn't w...,Difficult,48.2,104.0,18,255,https://www.ted.com/talks/zahra_langhi_why_lib...,576,561666,16000,"[{'sentence': 'I have never, ever forgotten th...","i have never, ever forgotten the words of my g...",0.028487,"{'laughter': 0, 'applause': 4, 'cheering': 0}",0,4,0,0.090970,0
1220,"Zainab Salbi: Women, wartime and the dream of ...",Fairly easy,74.1,128.0,14,413,https://www.ted.com/talks/zainab_salbi,1054,618890,18000,[{'sentence': 'I woke up in the middle of the ...,i woke up in the middle of the night with the ...,0.029084,"{'laughter': 1, 'applause': 1, 'cheering': 0}",1,1,0,0.231153,4
1221,Zak Ebrahim: I am the son of a terrorist. Here...,Plain English,67.6,152.0,11,368,https://www.ted.com/talks/zak_ebrahim_i_am_the...,545,6602165,198000,"[{'sentence': 'On November 5th, 1990, a man na...","on november 5th, 1990, a man named el-sayyid n...",0.029990,"{'laughter': 0, 'applause': 4, 'cheering': 0}",0,4,0,0.187294,0
1222,Zeresenay Alemseged: The search for humanity's...,Fairly easy,71.9,160.0,19,456,https://www.ted.com/talks/zeresenay_alemseged_...,943,1228159,36000,[{'sentence': 'I have 18 minutes to tell you w...,i have 18 minutes to tell you what happened ov...,0.029312,"{'laughter': 4, 'applause': 1, 'cheering': 0}",4,1,0,0.239127,4


In [63]:
#ciorna
#TODO: get it to work

output = subprocess.check_output(['java', '-cp', CORENLP_JAR, 'edu.stanford.nlp.pipeline.StanfordCoreNLP', '-annotators', 'tokenize', '-outputFormat', 'json', '-file', '-', '-'], input='This is a test sentence.'.encode())
print(output)

CalledProcessError: Command '['java', '-cp', 'models\\stanford-corenlp-4.5.4\\stanford-corenlp-4.5.4.jar', 'edu.stanford.nlp.pipeline.StanfordCoreNLP', '-annotators', 'tokenize', '-outputFormat', 'json', '-file', '-', '-']' returned non-zero exit status 1.

Use NLP techniques to extract extra features (WIP)

In [62]:
def compute_features(window_sentences):
    """Compute the features for a given window"""

    # Combine all sentences in the window
    window_text = ' '.join(window_sentences)
    
    # Compute the number of sentences in the window
    num_sentences = len(window_sentences)
    
    # Compute the mean length of clause
    output = subprocess.check_output(['java', '-cp', CORENLP_JAR, 'edu.stanford.nlp.pipeline.StanfordCoreNLP', '-annotators', 'tokenize,ssplit,pos,parse', '-outputFormat', 'json', '-file', '-', '-'], input=window_text.encode())
    output = output.decode()
    parse_trees = re.findall(r'"parse": "(.*?)"', output)
    clauses = []
    for parse_tree in parse_trees:
        clauses.extend(re.findall(r'\(S .*?\)', parse_tree))
    clause_lengths = [len(re.findall(r'\(VB.*?\)', clause)) + 1 for clause in clauses]
    mean_clause_length = sum(clause_lengths) / len(clause_lengths)
    
    # Compute clauses per sentence
    clauses_per_sentence = len(clauses) / num_sentences
    
    # Compute coordinate phrases per clause
    coordinate_phrases = []
    for parse_tree in parse_trees:
        coordinate_phrases.extend(re.findall(r'\(CC .*?\)', parse_tree))
    coordinate_phrases_per_clause = len(coordinate_phrases) / len(clauses)
    
    # Compute complex nominals per clause
    complex_nominals = []
    for parse_tree in parse_trees:
        complex_nominals.extend(re.findall(r'\(NP .*?SBAR', parse_tree))
    complex_nominals_per_clause = len(complex_nominals) / len(clauses)
    
    # Compute type-token ratio
    words = window_text.split()
    type_token_ratio = len(set(words)) / len(words)
    
    # Compute n-grams frequency features
    n_grams = [2, 3, 4, 5]
    n_grams_freq = {}
    for n in n_grams:
        n_grams_freq[f"{n}-"] = Counter(ngrams(words, n))
    
    return {
        'mean_clause_length': mean_clause_length,
        'clauses_per_sentence': clauses_per_sentence,
        'coordinate_phrases_per_clause': coordinate_phrases_per_clause,
        'complex_nominals_per_clause': complex_nominals_per_clause,
        'type_token_ratio': type_token_ratio,
        'n_grams_freq': n_grams_freq
    }

# Define the sliding window size
window_size = 5
window_features = []

# Iterate through the raw_transcript column
for index, row in data.iterrows():
    transcript = row['raw_transcript']
    
    # Tokenize the transcript into sentences
    sentences = sent_tokenize(transcript)

    # Initialize the window start and end indices
    start = 0
    end = window_size

    # Iterate through the sentences using the sliding window
    features_list = []
    while end <= len(sentences):
        # Extract the sentences in the current window
        window_sentences = sentences[start:end]

        # Compute the features for the current window
        features = compute_features(window_sentences)
        features_list.append(features)

        # Move the window one sentence forward
        start += 1
        end += 1

    # Store the features in the DataFrame
    window_features.append(features_list)

CalledProcessError: Command '['java', '-cp', 'models\\stanford-corenlp-4.5.4\\stanford-corenlp-4.5.4.jar', 'edu.stanford.nlp.pipeline.StanfordCoreNLP', '-annotators', 'tokenize,ssplit,pos,parse', '-outputFormat', 'json', '-file', '-', '-']' returned non-zero exit status 1.

In [59]:
features = ['FKRE_score', 'WPM', 'NAWL', 'NGSL', 'seconds', 'likes_per_view', 'laughter', 'applause', 'cheering', 'popularity_score']

# Create a list of cluster labels and corresponding number of talks
cluster_labels = []
num_talks = []
for cluster, talks_nr in talks_per_cluster.items():
    cluster_labels.append(cluster)
    num_talks.append(talks_nr)

# Create a list of indices for each cluster
cluster_indices = [np.where(data['popularity_category'] == label)[0] for label in cluster_labels]

# Split each cluster into training and testing sets
train_indices = []
test_indices = []
for indices in cluster_indices:
    # Select the features and target variable for this cluster
    X = data.loc[indices, features]
    y = data.loc[indices, 'popularity_category']

    # Normalize the features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Add the indices to the training and testing sets
    train_indices.extend(indices[X_train.astype(int)])
    test_indices.extend(indices[X_test.astype(int)])

# Print the number of talks in the training and testing sets
print(f"Number of talks in training set: {len(train_indices)}")
print(f"Number of talks in testing set: {len(test_indices)}")

Number of talks in training set: 976
Number of talks in testing set: 248


How to run CoreNLP Server

cd .\models\stanford-corenlp-4.5.4
java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 8080 -timeout 50000

CoreNLP paper:
Manning, Christopher D., Mihai Surdeanu, John Bauer, Jenny Finkel, Steven J. Bethard, and David McClosky. 2014. The Stanford CoreNLP Natural Language Processing Toolkit In Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics: System Demonstrations, pp. 55-60.

more ciorna

In [44]:
# Load the Stanford CoreNLP parser
parser = CoreNLPParser(url='http://localhost:8080')
dep_parser = CoreNLPDependencyParser(url='http://localhost:8080')

# Define the feature groups
syntactic_features = ['NP', 'VP', 'PP', 'SBAR', 'ADJP', 'ADVP', 'CONJP', 'FRAG', 'INTJ', 'LST', 'NAC', 'NX', 'PRN', 'PRT', 'QP', 'RRC', 'UCP', 'WHADJP', 'WHAVP', 'WHNP', 'WHPP']
lexical_features = ['LV', 'LS', 'LS1', 'LS2', 'LS3', 'LD', 'LSO1', 'LSO2', 'LSO3', 'LSS1', 'LSS', 'LSS3']
ngram_features = ['spoken', 'magazine', 'fiction', 'news', 'academic']

def compute_syntactic_features(parse_trees):
    syntactic_counts = Counter()
    for tree_iterator in parse_trees:
        for tree in tree_iterator:
            for subtree in tree.subtrees():
                if subtree.label() in syntactic_features:
                    syntactic_counts[subtree.label()] += 1
    return syntactic_counts

def compute_lexical_features(window):
    lexical_counts = Counter()
    for sentence in window:
        tokens = nltk.word_tokenize(sentence)
        pos_tags = nltk.pos_tag(tokens)
        for feature in lexical_features:
            if feature.startswith('LS'):
                pattern = ' '.join(feature.split('LS')[1].split('_'))
                matches = list(dep_parser.tregex(sentence, pattern))
                lexical_counts[feature] += len(matches)
            else:
                freq_dist = nltk.FreqDist([token.lower() for token, pos in pos_tags if pos.startswith(feature)])
                lexical_counts[feature] += freq_dist.B()
    return lexical_counts

def compute_ngram_features(window):
    ngram_counts = Counter()
    for feature in ngram_features:
        for sentence in window:
            tokens = nltk.word_tokenize(sentence)
            ngrams = nltk.ngrams(tokens, n=2)
            freq_dist = nltk.FreqDist([ngram for ngram in ngrams if ngram[0].lower() in feature.split('_')])
            ngram_counts[feature] += freq_dist.B()
    return ngram_counts

def extract_cocogen_features(text, ws=10):
    sentences = sent_tokenize(text)
    features = []

    for i in range(len(sentences) - ws + 1):
        window = sentences[i:i+ws]
        parse_trees = parser.parse_sents(window)

        syntactic_counts = compute_syntactic_features(parse_trees)
        lexical_counts = compute_lexical_features(window)
        ngram_counts = compute_ngram_features(window)

        features.extend(syntactic_counts.values())
        features.extend(lexical_counts.values())
        features.extend(ngram_counts.values())

    return features

# Example usage
text = "Thank you. It's really great to be here. I'm going to talk to you today about something that's very important to me, and that's the power of education."
features = extract_cocogen_features(text, 2)
print(features)

AttributeError: 'CoreNLPDependencyParser' object has no attribute 'tregex'

In [23]:
scaler = MinMaxScaler (feature_range = (0,1))

In [25]:
def score(item):
    reactions = {'applause': 3, 'laughter': 2, 'cheering': 1}
    total_score = 0
    for key in reactions:
        total_score += item['total_responses'][key] * reactions[key]
    return total_score


def extract_features_labels(corpus):
    X = []
    y = []
    for data in corpus:
        features = []
        features.append(data["FKRE_score"])
        # features.append(data["seconds"])
        features.append(data["NAWL"])
        features.append(data["NGSL"])
        features.append(data["WPM"])
        features.append(score(data))
        X.append(features)
        # Like count to view count ratio
        y.append(data["like_count"]/data["view_count"])
    X = np.array(X)
    y = np.array(y)

    X = scaler.fit_transform(X)
    y = scaler.fit_transform(y.reshape(-1,1))

    X = np.reshape(X, (X.shape[0], X.shape[1], 1))
    y = np.reshape(y, (y.shape[0], y.shape[1], 1))

    print("X shape: ", X.shape)
    print("y shape: ", y.shape)

    return X, y

X, y = extract_features_labels(corpus)

# Perform KMeans clustering with 2 clusters
kmeans = KMeans(n_clusters=2, random_state=0)
kmeans.fit(np.reshape(y, (y.shape[0], y.shape[1])))

# Get the cluster centroids
print(kmeans.cluster_centers_)
# Get the cluster labels
print(kmeans.labels_)

# Get the good and bad examples
good_examples_X = X[kmeans.labels_ == 0]
bad_examples_X = X[kmeans.labels_ == 1]

good_examples_y = y[kmeans.labels_ == 0]
bad_examples_y = y[kmeans.labels_ == 1]

print("Good examples: ", good_examples_X.shape, good_examples_y.shape)
print("Bad examples: ", bad_examples_X.shape, bad_examples_y.shape)

# Split the corpus into train and test sets
X_train_good, X_test_good, y_train_good, y_test_good = train_test_split(good_examples_X, good_examples_y, test_size=0.2, shuffle=False)
X_train_bad, X_test_bad, y_train_bad, y_test_bad = train_test_split(bad_examples_X, bad_examples_y, test_size=0.2, shuffle=False)

# Concatenate the good and bad examples
X_train = np.concatenate((X_train_good, X_train_bad))
X_test = np.concatenate((X_test_good, X_test_bad))
y_train = np.concatenate((y_train_good, y_train_bad))
y_test = np.concatenate((y_test_good, y_test_bad))

print("Train corpus size: ", len(X_train))
print("Test corpus size: ", len(X_test))

X shape:  (1224, 5, 1)
y shape:  (1224, 1, 1)
[[0.00753109]
 [0.98230349]]
[0 0 0 ... 0 0 0]
Good examples:  (1206, 5, 1) (1206, 1, 1)
Bad examples:  (18, 5, 1) (18, 1, 1)
Train corpus size:  978
Test corpus size:  246




In [26]:
regressor = Sequential ()
# TO DO Text vectorization
# TO DO Embedding layer
regressor.add(LSTM(units = 50, return_sequences= True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout (0.2))
regressor.add(LSTM(units = 50, return_sequences= True))
regressor.add(Dropout (0.2))
regressor.add(LSTM(units = 50, return_sequences= True))
regressor.add(Dropout (0.2))
regressor.add(LSTM(units = 50))
regressor.add(Dropout (0.2))
regressor.add(Dense (units=1))

regressor.compile(optimizer='adam', loss='mean_squared_error')
regressor.fit(X_train, y_train, epochs=30, batch_size=32)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x2178038ed40>

In [27]:
# Predict
y_pred = regressor.predict(X_test)

# y_pred_normal = np.reshape(y_pred, (-1,1))
# y_test_normal = np.reshape(y_test, (-1,1))

# y_pred_normal = scaler.inverse_transform(y_pred_normal)
# y_test_normal = scaler.inverse_transform(y_test_normal)

y_pred_normal = np.reshape(y_pred, (y_pred.shape[0],))
y_test_normal = np.reshape(y_test, (y_test.shape[0],))

# Print predicted and actual values
for i in range(len(y_pred_normal)):
    print("Predicted: ", y_pred_normal[i], "Actual: ", y_test_normal[i])
print("MSE: ", mean_squared_error(y_test_normal, y_pred_normal))
#print("Accuracy: ", accuracy_score(y_test, y_pred))

Predicted:  0.017070211 Actual:  0.008906125406766371
Predicted:  0.017108977 Actual:  0.008252673949505407
Predicted:  0.01704553 Actual:  0.008651126644492912
Predicted:  0.017022764 Actual:  0.0064495105534462666
Predicted:  0.017080953 Actual:  0.007424899921620112
Predicted:  0.017112933 Actual:  0.0077193602609216955
Predicted:  0.0170466 Actual:  0.008633628298997859
Predicted:  0.017027833 Actual:  0.008969998704320747
Predicted:  0.017040895 Actual:  0.008489957294764636
Predicted:  0.017040372 Actual:  0.004763138820858839
Predicted:  0.017091447 Actual:  0.00782552300522349
Predicted:  0.01701183 Actual:  0.008905487497858022
Predicted:  0.01699476 Actual:  0.008366034950310963
Predicted:  0.017102268 Actual:  0.008860057944798841
Predicted:  0.01707813 Actual:  0.007162274583595141
Predicted:  0.017107287 Actual:  0.008332371504353647
Predicted:  0.017022897 Actual:  0.008920918040276754
Predicted:  0.017042425 Actual:  0.005503781981377132
Predicted:  0.01707918 Actual:  0