# IMPORTS

In [None]:
import pyaudio
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import time
import wave
import librosa

import torch
from torch.nn import Sequential, Conv1d, ReLU, MaxPool1d, Dropout, Linear, Flatten, CrossEntropyLoss, ConstantPad1d

import itertools
from ipywidgets import interact, Button, IntText, GridBox, Layout, VBox, HBox, HTML, Output, Label
from IPython.display import display
from bokeh.io import push_notebook, show, output_notebook
from bokeh.layouts import row, column, layout
from bokeh.models import CategoricalColorMapper, ColumnDataSource, Text
from bokeh.plotting import figure, output_file
from GameGrid import Grid

# MODEL

In [None]:
labels_to_classes = {
    0: 'up',
    1: 'down',
    2: 'left',
    3: 'right'
}

In [None]:
model = Sequential(
    Conv1d(in_channels=1, out_channels=64, kernel_size=13),
    ReLU(),
    MaxPool1d(4),

    Conv1d(in_channels=64, out_channels=64, kernel_size=13),
    ReLU(),
    MaxPool1d(4),

    Conv1d(in_channels=64, out_channels=64, kernel_size=13),
    ReLU(),
    MaxPool1d(4),

    Conv1d(in_channels=64, out_channels=64, kernel_size=13),
    ReLU(),
    MaxPool1d(4),

    Flatten(),
    # 1728
    Linear(1728, 256),
    ReLU(),
    Dropout(0.3),
    
    Linear(256, 32),
    ReLU(),
    Dropout(0.3),

    Linear(32, 4)
)

In [None]:
model.load_state_dict(torch.load('saved_weights.pt', map_location=torch.device('cpu')))

<All keys matched successfully>

# AUDIO FUCTIONS

In [None]:
# DECLARING CONSTANTS
chunk = 1024
channels = 1
sample_rate = 16000
record_duration = 1
audio_format = pyaudio.paInt16
threshold = 500

In [None]:
pa = pyaudio.PyAudio()

stream = pa.open(
    format = audio_format,
    channels = channels,
    rate = sample_rate,
    input = True,
    frames_per_buffer = chunk,
)

In [None]:
# CHECKING IF AUDIO IS MORE THAN THRESHOLD
def detect_voice(frame):
    avg_value = np.average(np.abs(frame))
    if avg_value > threshold:
        return True
    else:
        return False

In [None]:
# WRITE EACH RECORDING TO FILE
def write_to_file(recording):
    filename = 'temp.wav'

    wf = wave.open(filename, 'wb')
    wf.setnchannels(channels)
    wf.setsampwidth(pa.get_sample_size(audio_format))
    wf.setframerate(sample_rate)
    wf.writeframes(recording)
    wf.close()
    
    return filename

In [None]:
def predict(filename):
    waveform, sample_rate = librosa.load(filename)
    
    waveform = waveform.reshape(1, waveform.shape[0])
    if waveform.shape[1] > sample_rate:
        waveform = waveform[:, :sample_rate]
    else:
        pad_len = sample_rate - waveform.shape[1]
        waveform = ConstantPad1d((0, pad_len), 0)(waveform)

    waveform = waveform.reshape(waveform.shape[1])
    waveform = librosa.resample(waveform, sample_rate, 8000)

    waveform = waveform - waveform.min()
    waveform = waveform / waveform.max()

    waveform = waveform.reshape(1, 1, waveform.shape[0])

    waveform = torch.tensor(waveform)
    
    model.eval()
    output = model(waveform).detach()
    output = torch.log_softmax(output, dim=1)
    _, pred = torch.max(output, dim = 1)
    on_button_clicked(labels_to_classes[pred.numpy()[0]])
    print('You said', labels_to_classes[pred.numpy()[0]])

In [None]:
def record():
    print('Voice detected - begin to record')
    waveform = []
    current = time.time()
    end = time.time() + record_duration
    while current <= end:
        data = stream.read(chunk)
        waveform.append(data)
        current = time.time()
    filename = write_to_file(b''.join(waveform))
    predict(filename)
    print('Return to listening\n\n')

# PLAY

In [None]:
# https://github.com/zvapa/2048-game-python-jupyter
output_notebook()

g = Grid(4, 4)

line_width = 10
line_color = '#B8AD9A'  # the same for plot outline and rectangles border

p = figure(tools='', title=f"Score: {g.score}", plot_width=400, plot_height=400, 
           x_range=(-0.5, 3.5), y_range=(3.5, -0.5))

# make the graph look more like a game board
line_width = 10
line_color = '#B8AD9A'
p.outline_line_width = line_width
p.outline_line_color = line_color
p.xgrid.grid_line_color = p.ygrid.grid_line_color = None
p.title.align = 'right'
p.title.text_font_size = '1.5em'
p.axis.visible = False


color_dict = {0: '#D7D7D7', 2: '#ECE3AF', 4: '#E0D1A1', 8: '#D3B486', 16: '#F0AB79', 32: '#F18231',
              64: '#F73B0C', 128: '#DAD083', 256: '#D6C01E', 512: '#D4E80F', 1024: '#A3D829',
              2048: '#30D55A', 4096: '#B356E5', 8192: '#37E5E7', 16384: '#5117EA', 32768: '#C13695',
              65536: '#468A9C', 131072: '#9C4657'}
coord_pairs = [i for i in itertools.product(range(4), range(4))]
x, y = zip(*coord_pairs)

def update_source():
    g.spawn()
    vals = [g.to_numpy()[c] for c in coord_pairs]
    tile_colors = [color_dict[v] for v in vals]
    text_colors = ['black' if v != 0 else '#D7D7D7' for v in vals]  # make zero values invisible
    return dict(x=y, y=x, vals=vals, tile_colors=tile_colors, text_colors=text_colors)

# initial state of the data source
source = ColumnDataSource(update_source())

# draw tiles
p.rect(x='x', y='y', source=source, width=1, height=1, line_width=line_width,
       line_color=line_color, fill_color='tile_colors')

# add text
p.text(x='x', y='y', source=source, text='vals', text_align='center', 
       text_color='text_colors')

# this updates the grid object based on the direction of the calling button, then recreates the datasource
def on_button_clicked(b):
    direction = b#.description.lower()
    if g.slide(direction):  # if the move generated any change on the grid -> update source
        source.data = update_source()
        p.title.text = f"Score: {g.score}"
        push_notebook()
        if g.game_over():
            print("Game Over")
            print("Re-run this cell to start again.")
            
show(p, notebook_handle=True)

In [None]:
print('Start listening ...')
while True:
    frame = np.frombuffer(stream.read(chunk), dtype=np.int16)
    if detect_voice(frame):
        record()

Start listening ...
Voice detected - begin to record
You said down
Return to listening


Voice detected - begin to record
You said up
Return to listening


Voice detected - begin to record
You said down
Return to listening


Voice detected - begin to record
You said left
Return to listening


Voice detected - begin to record
You said right
Return to listening




KeyboardInterrupt: 

In [None]:
stream.stop_stream()
stream.close()
pa.terminate()