In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os,sys
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from cycler import cycler
import tensorflow as tf
import seaborn as sns
import pandas
import datetime
from imdb import IMDb
from mediawikiapi import MediaWikiAPI
from tqdm import tqdm

In [3]:
color_cycle_wong = ['#000000','#E69F00','#CC79A7','#56B4E9','#009E73','#0072B2','#F0E442','#D55E00']
Six_color_cycle = ["#5790FC", "#F89C20", "#E42536", "#964A8B", "#9C9CA1", "#7A21DD"]
Eight_color_cycle = ["#1845FB", "#FF5E02", "#C91F16", "#C849A9", "#ADAD7D", "#86C8DD", "#578DFF", "#656364"]
Ten_color_cycle = ["#3F90DA", "#FFA90E", "#BD1F01", "#94A4A2", "#832DB6", "#A96B59", "#E76300", "#B9AC70", "#717581", "#92DADD"]
mpl.rcParams['axes.prop_cycle'] = cycler(color=Six_color_cycle)

In [4]:
interesting_parameters = ['title','genres','year','languages','votes','runtimes','rating']

In [5]:
save_data_file = './Misc_Files/imdb_movie_selection_v1.npy'
loaded_data = np.load(save_data_file,allow_pickle=True)
movie_selection_text = pandas.DataFrame(loaded_data,columns=interesting_parameters)

In [6]:
movie_selection_text

Unnamed: 0,title,genres,year,languages,votes,runtimes,rating
0,Tuscan Wedding,Comedy,2014,English,1790,104,5.2
1,The Four of the Apocalypse...,Fantasy,1975,Italian,2485,104,6.4
2,Knuckleball!,Documentary,2012,English,1686,93,7.1
3,The Farewell,Comedy,2019,Mandarin,56213,100,7.6
4,Johnny in the Clouds,Drama,1945,English,1441,109,7.3
...,...,...,...,...,...,...,...
5697,Summer Lovers,Comedy,1982,English,3971,98,5.7
5698,Matchstick Men,Comedy,2003,English,126764,116,7.3
5699,Sands of Iwo Jima,Action,1949,English,10065,100,7.1
5700,The Nutcracker,Family,1993,English,1968,92,5.8


### Convert `movie_selection_text` to numerical tensor
* Convert strings to floats
* Take out non-used column (title)
* Take out solution column (rating) and make appropriate shape
* Normalize columns
* Convert Nx5 matrix to N_groups x 5 x (N_movies/N_groups)

In [7]:
def normalize_cols(df):
    normed_df = df.copy()
    for key in normed_df.keys():
        normed_df[key] = normed_df[key]  / normed_df[key].abs().max()
    return normed_df

In [8]:
genre_map = {genre:float(i) for i,genre in enumerate(movie_selection_text['genres'].value_counts().keys())}
language_map = {lang:float(i) for i,lang in enumerate(movie_selection_text['languages'].value_counts().keys())}

In [9]:
movie_num_map = movie_selection_text.copy()
movie_num_map = movie_num_map.replace(to_replace=genre_map)
movie_num_map = movie_num_map.replace(to_replace=language_map)
movie_num_map.pop('title')

target = movie_num_map.pop('rating')
movie_num_map["runtimes"] = pandas.to_numeric(movie_num_map["runtimes"])
movie_num_map = normalize_cols(movie_num_map)

In [10]:
n_groups = 5
trimmed_movies = (np.shape(movie_num_map)[0]%n_groups)
movie_num_map = movie_num_map.to_numpy()
print(f'trimmed {trimmed_movies} movies.')
movie_num_map = movie_num_map[:-trimmed_movies].reshape(-1,n_groups,
                                                        np.shape(movie_num_map)[1])
target = target.to_numpy()
target = target[:-trimmed_movies].reshape(n_groups,-1)
winners = np.asarray([np.where(target[:,t]==np.max(target[:,t]))[0][0] for t in range(np.shape(target)[1])]).astype(np.int32)
winners = winners.astype(np.int32)
print(np.shape(target))
print(np.shape(winners))
print(np.shape(movie_num_map))
#movie_num_map.shape()

trimmed 2 movies.
(5, 1140)
(1140,)
(1140, 5, 5)


In [11]:
winners.shape

(1140,)

# TensorFlow Start

In [12]:
#movie_num_map = tf.convert_to_tensor(movie_num_map)
movie_num_map = tf.data.Dataset.from_tensor_slices((movie_num_map, winners))
movie_num_map = movie_num_map.shuffle(5000).batch(32)

#movie_num_map = movie_num_map.shuffle(5000).batch(10)

#movie_num_map[0]

In [13]:
movie_num_map.element_spec

(TensorSpec(shape=(None, 5, 5), dtype=tf.float64, name=None),
 TensorSpec(shape=(None,), dtype=tf.int32, name=None))

In [14]:
for asdf in movie_num_map.as_numpy_iterator():
    print(np.shape(asdf[0]),asdf[1])

(32, 5, 5) [4 2 0 0 1 0 0 4 2 1 1 1 3 2 0 0 4 4 1 4 1 0 0 1 2 1 1 0 2 1 2 1]
(32, 5, 5) [1 1 2 4 1 2 4 0 0 2 1 2 1 0 4 3 3 2 4 2 0 0 2 0 4 2 1 4 4 3 4 0]
(32, 5, 5) [0 2 0 0 4 3 1 2 4 2 1 1 3 1 4 2 1 3 4 4 1 1 2 4 3 1 1 3 1 1 0 1]
(32, 5, 5) [4 1 2 0 4 0 4 0 3 1 3 0 0 3 1 0 2 2 3 0 3 0 1 4 0 3 4 0 4 1 2 4]
(32, 5, 5) [1 1 0 3 3 1 4 0 4 3 1 2 2 4 2 1 0 2 1 0 2 1 2 3 2 0 3 3 2 3 1 2]
(32, 5, 5) [0 1 2 3 0 3 0 0 0 3 1 4 0 4 3 1 4 1 2 4 4 0 0 3 1 2 2 3 2 1 4 0]
(32, 5, 5) [3 3 4 3 3 2 2 3 3 1 3 0 1 3 1 3 2 3 2 1 2 2 4 0 3 3 3 0 4 2 2 4]
(32, 5, 5) [1 3 0 2 3 3 3 1 4 2 1 1 4 3 1 4 2 3 4 1 3 1 4 4 1 3 2 2 3 0 1 2]
(32, 5, 5) [2 1 1 0 2 0 3 2 3 4 2 1 2 4 1 2 0 0 4 3 3 2 3 2 1 4 2 1 2 3 1 4]
(32, 5, 5) [2 2 2 2 4 3 4 0 0 1 2 3 3 2 1 2 4 2 1 0 2 2 3 2 3 2 0 4 0 2 3 1]
(32, 5, 5) [3 3 2 0 4 1 3 3 1 1 1 2 1 1 0 2 3 3 4 2 3 3 4 4 1 4 2 2 1 3 1 0]
(32, 5, 5) [2 0 2 1 2 0 0 4 1 0 2 3 2 0 2 1 0 3 0 3 3 3 1 1 4 0 0 3 0 3 2 3]
(32, 5, 5) [1 3 1 0 3 2 4 2 1 2 4 4 4 0 1 3 4 2 4 1 0 3 3 0 4 3 0 0 4 2 0 1]

In [15]:
model = tf.keras.Sequential([tf.keras.layers.Flatten(),
                             tf.keras.layers.Dense(128),
                             tf.keras.layers.Dense(50),
                             tf.keras.layers.Dense(10)])
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
#model.fit(movie_num_map.repeat(), epochs=2, steps_per_epoch=20)
history = model.fit(movie_num_map.repeat(), epochs=100, verbose=True, steps_per_epoch=20)
model.summary()

KeyboardInterrupt: 

In [None]:
model = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=(10,5,1),
                             #tf.keras.layers.Dense(10, activation='relu'),
                             tf.keras.layers.Dense(10, activation = tf.keras.activations.softmax)])
model = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
                             tf.keras.layers.Dense(128, activation=tf.nn.relu),
                             tf.keras.layers.Dense(10, activation=tf.nn.softmax)
                            ])
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model.summary()

In [None]:
#BATCH_SIZE = 32
#train_dataset = train_dataset.cache().repeat().shuffle(num_train_examples).batch(BATCH_SIZE)
#test_dataset = test_dataset.cache().batch(BATCH_SIZE)
#model.fit(train_dataset, epochs=5, steps_per_epoch=math.ceil(num_train_examples/BATCH_SIZE))
history = model.fit(movie_num_map, epochs=100, verbose=True)

# Tensor Flow Examples

```
celsius_q    = np.array([-40, -10,  0,  8, 15, 22,  38],  dtype=float)
fahrenheit_a = np.array([-40,  14, 32, 46, 59, 72, 100],  dtype=float)
l0 = tf.keras.layers.Dense(units=1, input_shape=[1]) 
model = tf.keras.Sequential([l0])
model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(0.1))
model.summary()
```

```
tf.keras.utils.plot_model(model, show_shapes=True)
```

```
history = model.fit(celsius_q, fahrenheit_a, epochs=500, verbose=False)
model.predict([100.0])
```

## Transfer Learning