In [None]:
import os
os.chdir('..')

In [None]:
import os
os.listdir()

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
import torchvision
from pytorch_lightning.callbacks import ModelCheckpoint
from src.model.lit_module import LitModule
from src.data.dataset import VideoLabelDataset
import src.constants as const
from torch.utils.data import DataLoader
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from src.data.dataset import (VideoLabelDataset,
                              VideoFolderPathToTensor,
                              VideoResize)
import plotly
import plotly.express as px
import numpy as np
from ipywidgets import interact
import ipywidgets as widgets
import src.constants as const

In [None]:
import pandas as pd

In [None]:
dataset = VideoLabelDataset(
            const.LABELS_TABLE_QA_PATH,
            img_transform=torchvision.transforms.Compose([
                VideoFolderPathToTensor(),
                VideoResize(const.IMG_SIZE)]))
df = dataset.dataframe

In [None]:
df[df.pipe_x > df.pipe_x.quantile(.99)].head(5)

In [None]:
df[df.enemy_speed < df.enemy_speed.quantile(.1)].head(5)

In [None]:
# does mario run into the pipe? -->  no
df[(df.mario_speed > df.mario_speed.quantile(.9)) & (df.pipe_x < df.pipe_x.quantile(.15))].head(5)

In [None]:
# does mario run into the enemy? -->  no
df[(df.mario_speed > df.mario_speed.quantile(.9)) & (df.enemy_speed < df.enemy_speed.quantile(.15))].head(5)

In [None]:
# does the enemy run into the pipe? --> yes but very little only a few times...
df[(df.enemy_speed > df.enemy_speed.quantile(.9)) & (df.pipe_x < df.pipe_x.quantile(.1))].head(5)

In [None]:
# is the pipe visable when mario is slow and the pipe is far away? --> yes
df[(df.mario_speed < df.mario_speed.quantile(.1)) & (df.pipe_x > df.pipe_x.quantile(.9))].head(5)

In [None]:
def plot_first_and_last_pic(index1, index2):
    dir_path = f'data/imgs_series_{const.DATA_VERSION}/{index1:05d}'
    files = os.listdir(dir_path)
    files.sort()
    img_ls = [open(f'{dir_path}/{f}', 'rb').read() for f in files]
    wi_ls = [widgets.Image(value=img, format='png', width=200) for img in img_ls]
    sidebyside = widgets.HBox(wi_ls[::2])
    print(f'mario speed: {df[df.imgs_folder_path == dir_path].mario_speed.values[0]}'
          f', enemy speed: {df[df.imgs_folder_path == dir_path].enemy_speed.values[0]}',
          f', box x: {df[df.imgs_folder_path == dir_path].box_x.values[0]}',
          f', pipe x: {df[df.imgs_folder_path == dir_path].pipe_x.values[0]}')
    display(sidebyside)
    
    dir_path = f'data/imgs_series_{const.DATA_VERSION}/{index2:05d}'
    files = os.listdir(dir_path)
    files.sort()
    img_ls = [open(f'{dir_path}/{f}', 'rb').read() for f in files]
    wi_ls = [widgets.Image(value=img, format='png', width=200) for img in img_ls]
    sidebyside = widgets.HBox(wi_ls[::2])
    print(f'mario speed: {df[df.imgs_folder_path == dir_path].mario_speed.values[0]}'
          f', enemy speed: {df[df.imgs_folder_path == dir_path].enemy_speed.values[0]}',
          f', box x: {df[df.imgs_folder_path == dir_path].box_x.values[0]}',
          f', pipe x: {df[df.imgs_folder_path == dir_path].pipe_x.values[0]}')
    display(sidebyside)
    
interact(plot_first_and_last_pic, index1=list(range(1, len(df)-1)), index2=list(range(1, len(df)-1)))

In [None]:
fig = go.Figure()
fig = make_subplots(rows=1, cols=4)
for i, c in enumerate(const.HIDDEN_STATE_COLS):
    fig.add_trace(go.Histogram(x=df[c].values), row=1, col=i+1)
    fig.update_xaxes(title_text=c, row=1, col=i+1)

# Overlay both histograms
fig.update_layout(barmode='overlay', showlegend=False, title_text="Hidden state & question distributions")
# Reduce opacity to see both histograms
fig.update_yaxes(title_text="Frequency", row=1, col=1)
fig.update_traces(opacity=0.5)
fig.show()

In [None]:
fig = go.Figure()
for c in const.ANSWER_COLS:
    fig.add_trace(go.Histogram(x=df[c].values, name=c))

# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_layout(title_text="Optimal answer distributions")
fig.update_xaxes(title_text="Optimal answer value")
fig.update_yaxes(title_text="Frequency")
fig.update_traces(opacity=0.5)
fig.show()

In [None]:
answer_list = const.ANSWER_COLS
features_list = [['box_x', 'mario_speed'], ['pipe_x', 'enemy_speed'], ['mario_speed', 'pipe_x'], ['mario_speed', 'enemy_speed']]
for answer, features in zip(answer_list, features_list):
    fig = make_subplots(rows=1, cols=2)
    fig.add_trace(go.Scatter(x=df[features[0]], y=df[answer], mode='markers'), row=1, col= 1)
    fig.add_trace(go.Scatter(x=df[features[1]], y=df[answer], mode='markers'), row=1, col= 2)
    fig.update_layout(title_text="Optimal nswer values over hidden states")
    fig.update_xaxes(title_text="Optimal answer value")
    fig.update_yaxes(title_text=answer)
    fig.update_xaxes(title_text=features[0], row=1, col=1)
    fig.update_xaxes(title_text=features[1], row=1, col=2)
    fig.update_traces(opacity=0.5)
    fig.show()

In [None]:
x = np.array(range(100))
y = 1/x
import matplotlib.pyplot as plt
plt.plot(x,y)

In [None]:
x = np.array(range(100))
y = 2/x
import matplotlib.pyplot as plt
plt.plot(x,y)

In [None]:
import src.constants as const
def plot_answer(answer_col, col0, col1):
    fig = px.scatter_3d(df, x=col0, y=col1, z=answer_col, color=answer_col)
    fig.show()
interact(plot_answer, col0=const.HIDDEN_STATE_COLS, col1=const.HIDDEN_STATE_COLS,
         answer_col=const.ANSWER_COLS)

In [None]:
import matplotlib.pyplot as plt

In [None]:
x = np.array(range(0,100))

In [None]:
y = 1/x
plt.scatter(x,y)