# Speech-to-Speech Abstractive Summarisation

In this project we are Using How2 Dataset [link](https://srvk.github.io/how2-dataset/) which is a collection of instructional YouTube videos with English subtitles IDs and human-made summaries. This project is concerned with the transformation of audio to audio and as such, the audio has been ripped from the videos.

* Task A) Construct databases of video transcriptions and human made descriptions
* Task B) Download audios of 400 videos
* Task C) Align transcriptions to the audios 
* Task D) Sequence Labelling and Feature Significance
* Task E) Audio Generation

In [None]:
# Load the test dataset into a pandas data frame
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
import spacy
import scipy.io
import math
import pickle 
import Levenshtein
from sklearn import preprocessing
from pydub import AudioSegment
from pydub.playback import play
from gensim.summarization.summarizer import summarize as extractive_sum
from rouge import Rouge 
from allennlp.data.tokenizers import Token, Tokenizer, SpacyTokenizer
from statistics import stdev 

rouge = Rouge()
nlp = spacy.load("en_core_web_sm")

### Task A) Construct databases of video transcriptions and human made descriptions

In [None]:

# Load the descriptions into a pandas data frame
desctable = pd.read_csv('speech_data/text/sum_cv/desc.tok.txt', header=None, skipinitialspace=True, names=["a"])
desctable.head()
filename = 'speech_data/text/sum_cv/tran.tok.txt'
with open(filename, 'r') as f:
    data = f.read().replace('\n','%%%')
    pick
# Load the transcriptions into into a pandas data frame 
trantable = pd.read_csv(pd.compat.StringIO(data), sep="%%%", header=None)
trantable = trantable.T
trantable.drop(trantable.tail(1).index,inplace=True)
trantable.columns=['a']

In [None]:
# Split id and description
desctable['id'] = desctable.apply(lambda row: str(row.a).split(" ")[0], axis = 1) 
desctable['desc'] = desctable.apply(lambda row: ' '.join(str(row.a).split(" ")[1:]), axis = 1) 
desctable.drop('a',1,inplace=True)
# Split id and transcription
trantable['id'] = trantable.apply(lambda row: row.a.split(" ")[0], axis = 1) 
trantable['tran'] = trantable.apply(lambda row: ' '.join(row.a.split(" ")[1:]), axis = 1) 
trantable.drop('a',1,inplace=True)

# Join tables on ID to create a single table 
conctable = pd.merge(desctable,trantable,on="id")
conctable.head()

### Task B) Download audios of 400 videos

In [None]:
# # Install dependencies to get audio from YouTube
# !pip -q install wget youtube-dl wget 

In [None]:
# # Loop over the 400 YouTube videos
# # Save each video's audio as 8000Hz wav
# for YOUTUBE_ID in shortlist:
#     !youtube-dl --extract-audio --audio-format wav --quiet --output "{YOUTUBE_ID}_FULL.%(ext)s" https://www.youtube.com/watch\?v\={YOUTUBE_ID}
#     !ffmpeg -loglevel panic -y -i {YOUTUBE_ID}_FULL.wav -acodec pcm_s16le -ac 1 -ar 8000 {YOUTUBE_ID}.wav
#     !rm {YOUTUBE_ID}_FULL.wav

In [None]:
# # Take the first 400 YouTube video IDs
# youtube_ids = conctable['id'].tolist()
# shortlist = youtube_ids[:400]

In [None]:
exploretable = conctable

In [None]:
exploretable.loc[:,'intersection'] = exploretable.apply(lambda row: [value for value in row.desc.split(" ") if value in row.tran.split(" ")] , axis=1)
exploretable.loc[:,'descnottran'] = exploretable.apply(lambda row: [value for value in row.desc.split(" ") if value not in row.tran.split(" ")] , axis=1)

In [None]:
# Calculate ROUGE statistics for the video descriptions and the video transcriptions
exploretable['rouge1-f'] = exploretable.apply(lambda row:  rouge.get_scores(row.desc, row.tran)[0]['rouge-1']['f'], axis=1)
exploretable['rouge1-r'] = exploretable.apply(lambda row:  rouge.get_scores(row.desc, row.tran)[0]['rouge-1']['r'], axis=1)
exploretable['rouge1-p'] = exploretable.apply(lambda row:  rouge.get_scores(row.desc, row.tran)[0]['rouge-1']['p'], axis=1)
exploretable['rouge2-f'] = exploretable.apply(lambda row:  rouge.get_scores(row.desc, row.tran)[0]['rouge-2']['f'], axis=1)
exploretable['rouge2-r'] = exploretable.apply(lambda row:  rouge.get_scores(row.desc, row.tran)[0]['rouge-2']['r'], axis=1)
exploretable['rouge2-p'] = exploretable.apply(lambda row:  rouge.get_scores(row.desc, row.tran)[0]['rouge-2']['p'], axis=1)
exploretable['rougel-f'] = exploretable.apply(lambda row:  rouge.get_scores(row.desc, row.tran)[0]['rouge-l']['f'], axis=1)
exploretable['rougel-r'] = exploretable.apply(lambda row:  rouge.get_scores(row.desc, row.tran)[0]['rouge-l']['r'], axis=1)
exploretable['rougel-p'] = exploretable.apply(lambda row:  rouge.get_scores(row.desc, row.tran)[0]['rouge-l']['p'], axis=1)

In [None]:
import os
from os.path import isfile, join
exploretable['downloaded'] = exploretable.apply(lambda row: os.path.isfile(f'./speech_audios/{row.id}.wav'), axis=1)

In [None]:
from datetime import datetime
dt_string = datetime.now().strftime("%d%m%Y")
exploretable.to_pickle(f'./exploretable{dt_string}.pkl')

### Task C) Align transcriptions to the audios

In [None]:
# Alignment of transcription to audio
# Define imports for Kaldi Alignment
from kaldi.alignment import NnetAligner
from kaldi.fstext import SymbolTable
from kaldi.lat.align import WordBoundaryInfoNewOpts, WordBoundaryInfo
from kaldi.nnet3 import NnetSimpleComputationOptions
from kaldi.util.table import SequentialMatrixReader
import string

In [None]:
def aspire_alignment():
    # Construct aligner
    decodable_opts = NnetSimpleComputationOptions()
    decodable_opts.acoustic_scale = 1.0
    decodable_opts.frames_per_chunk = 150
    aligner = NnetAligner.from_files(
        "exp/tdnn_7b_chain_online/final.mdl",
        "exp/tdnn_7b_chain_online/tree",
        "data/lang/L.fst",
        "data/lang/words.txt",
        "data/lang/phones/disambig.int",
        decodable_opts=decodable_opts)
    phones = SymbolTable.read_text("data/lang/phones.txt")
    wb_info = WordBoundaryInfo.from_file(WordBoundaryInfoNewOpts(),
                                         "data/lang/phones/word_boundary.int")

    # Define feature pipelines as Kaldi rspecifiers
    feats_rspec = (
        "ark:compute-mfcc-feats --config=conf/mfcc_hires.conf scp:data/test/wav.scp ark:- |"
    )
    ivectors_rspec = (
        "ark:compute-mfcc-feats --config=conf/mfcc_hires.conf scp:data/test/wav.scp ark:- |"
        "ivector-extract-online2 --config=conf/ivector_extractor.conf ark:data/test/spk2utt ark:- ark:- |"
    )

    alignments=[]
    # Align wav files
    with SequentialMatrixReader(feats_rspec) as f, \
         SequentialMatrixReader(ivectors_rspec) as i, \
         open("data/test/text","r") as t:
        for (fkey, feats), (ikey, ivectors), line in zip(f, i, t):
            tkey, text = line.strip().split(None, 1)
            text = text.translate(str.maketrans('', '', string.punctuation))
            assert(fkey == ikey == tkey)
            out = aligner.align((feats, ivectors), text)
            word_alignment = aligner.to_word_alignment(out["best_path"], wb_info)

            with open(f'alignments/{tkey}.txt', 'w') as f:
                print(f"{word_alignment}", file=f)
    return alignments

#### File structure within the /data folder is as follows :
-    /lang
-    /test
-        spk2utt - maps speakers to utterances? just repeat double unique id eg utt1 utt1
-        text - transcription for each utterance on each line 
-        utt1.wav
-        wav.scp

In [None]:
def generateAlignmentDeps(table):
    downloadtable = table.query('downloaded==True')
    tranlist = downloadtable['tran'].tolist()
    idlist = downloadtable['id'].tolist()
#generate spk2utt 
    with open("data/test/spk2utt", 'w') as f:
        for id in idlist:
            print(f"{id} {id}", file=f)
#generate text 
    with open("data/test/text", 'w') as f:
        for index, id in enumerate(idlist):
            tran = tranlist[index]
            print(f"{id} {tran}", file=f)
#generate wav.scp 
    with open("data/test/wav.scp", 'w') as f:
        for id in idlist:
            print(f"{id} speech_audios/{id}.wav", file=f)

generateAlignmentDeps(exploretable)

In [None]:
# Ensure that you have run alignment_dependencies/path.sh in order to add Kaldi to the PATH

#HACKY FIX IMPLEMENTED 
#IN PyKaldi API, if word not found in symbol table (out of vocabulary) it is set to <unk> or index 16. 
# The effect of this upon results needs to be discussed
aspire_alignment();

### Task D) Sequence Labelling and Feature Significance

#### Data Loading

WAV file -> 45-dimension feature vector
*    The WAV file is analysed in 100ms frames
*    For each 100ms frame a feature vector is generated containing the following information:
*    The min, max, median, mean and range of the pitch (based on 10ms subframes)
*    The min, max, median, mean and range of the energy (based on 10ms subframes)
*    Mel Cepstral Coefficients + 1st and 2nd derivatives

Frame * 100
Utterance Length stays the same
Alignment * 10

| Index |                 Feature                |
|:-----:|:--------------------------------------:|
|   0   | VAD                                    |
|   1   | Pitch - Low                            |
|   2   | Pitch - High                           |
|   3   | Pitch - Median                         |
|   4   | Pitch - Mean                           |
|   5   | Pitch - Range                          |
|  6-17 | Cepstral Coefficients 1-12             |
|   18  | Energy                                 |
| 19-31 | 1st Diff of Cepstral Coefficients 1-12 |
|   32  | 1st Diff of Energy                     |
| 33-44 | 2nd Diff of Cepstral Coefficients 1-12 |
|   45  | 2nd Diff of Energy                     |


In [None]:
from os import listdir
from os.path import isfile, join
matfiles = [file for file in listdir("./acoustic_feats_170520/") if file.endswith('.mat')]
exploretable = pd.read_pickle("./exploretable16052020.pkl")

In [None]:
from stop_words import get_stop_words
stop_words = get_stop_words('en')


selected_indexes = []
if bool(input("Include Voice Activity Detection? Y/n?").lower()!='n'):
    selected_indexes.append(0)
if bool(input("Include Pitch Statistics? Y/n?").lower()!='n'):
    selected_indexes.extend(list(range(1,6)))
if bool(input("Include mel? Y/n?").lower()!='n'):
    selected_indexes.extend(list(range(6,20)))
if bool(input("Include 1st diff of mel? Y/n?").lower()!='n'):
    selected_indexes.extend(list(range(20,34)))
if  bool(input("Include 2nd diff of mel? Y/n?").lower()!='n'):
    selected_indexes.extend(list(range(34,45)))

print(selected_indexes)
    
FEATUREPATH = "./acoustic_feats_170520/"
AUDIOPATH = "./speech_audios/"
ALIGNMENTPATH = "alignments/"

positives=[]
negatives=[]
for matfile in matfiles[:30]:
    try:
        feats = scipy.io.loadmat(FEATUREPATH+matfile)['ret']
        feats = feats[:, selected_indexes]
        chosenid = matfile.replace(".mat","")
        audiofile = AudioSegment.from_wav(f'{AUDIOPATH}{chosenid}.wav')
        alignment =  eval(open(f'{ALIGNMENTPATH}{chosenid}.txt', "r").read())
        row = exploretable.query(f'id=="{chosenid}"')
        description = set(row['desc'].values[0].split(" "))
        transcription = set(row['tran'].values[0].split(" "))
        intersection = description&transcription

        importantwords = list(filter(lambda x: x[0] in intersection and x[0] not in stop_words, alignment))

        impidx = []
        for imp in importantwords:
#             print(imp)
            start = math.ceil((imp[1])/10) 
            gap =  math.floor((imp[2]/10- (imp[2]%1)))
            end = start + gap +1
#             print (start,end)
            idx = list(range(start,end))
#             print(idx)
            impidx.extend(idx)
        positiveframes=feats[impidx]
        negativeframes=np.delete(feats, impidx,axis=0)
        positives.extend(positiveframes)
        negatives.extend(negativeframes)
    except:
        pass

positives =  np.asarray(positives)
negatives =  np.asarray(negatives)
data = np.concatenate((positives, negatives))
data = preprocessing.scale(data)
labels = [1]*len(positives) + [0]*len(negatives)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.25, random_state=123)
print("Data loaded!")

####  Task D-1) SVM

In [None]:
from sklearn import svm

#Create a svm regressor
# clf = svm.NuSVR(C=25.0, nu=0.5, kernel='rbf', max_iter=-1, verbose=1) 

#Train the model using the training sets
# clf.fit(X_train, y_train)

In [None]:
with open('best_svm.sav', 'rb') as pickle_file:
    clf = pickle.load(pickle_file)

In [None]:
res = clf.predict(X_test)
def avg(lst):
    return sum(lst)/len(lst)

nonzeroind = np.nonzero(y_test)[0]
real_ones = res[nonzeroind]
print(avg(real_ones))
zeroind =  [idx for idx, val in enumerate(y_test) if val == 0] 
real_zeros = res[zeroind]
print(avg(real_zeros))


In [None]:
import matplotlib.pyplot as plt 
fig = plt.figure()
plt.hist(real_zeros,np.arange(0,1,0.01));
fig.suptitle('Distribution of the predicted prominence score of the negative frames - SVM', fontsize=20)
plt.xlabel('Prominence Score', fontsize=18)
plt.ylabel('Number of Frames', fontsize=16)

In [None]:
import matplotlib.pyplot as plt 
fig = plt.figure()
plt.hist(real_ones,np.arange(0,1,0.01));
fig.suptitle('Distribution of the predicted prominence score of the positive frames - SVM', fontsize=20)
plt.xlabel('Prominence Score', fontsize=18)
plt.ylabel('Number of Frames', fontsize=16)

In [None]:
TP = len(list(filter(lambda x : x>0.5, real_ones)))
FP = len(list(filter(lambda x : x>0.5, real_zeros)))
TN = len(list(filter(lambda x : x<0.5, real_zeros)))
FN = len(list(filter(lambda x : x<0.5, real_ones)))

precision = TP /(TP+FP)
recall = TP / (TP+FN)
F1 = 2*precision*recall/(precision+recall)
print(precision,recall,F1)

####  Task D-2) Neural Network

In [None]:
from sklearn.neural_network import MLPRegressor
clf = MLPRegressor(
    verbose=True,
    random_state=5,
    activation='tanh',
    solver='adam', 
    max_iter=500,
    hidden_layer_sizes=(300,200,200)
).fit(X_train, y_train)

In [None]:
from scipy.stats import logistic
res = clf.predict(X_test)
def avg(lst):
    return sum(lst)/len(lst)

nonzeroind = np.nonzero(y_test)[0]
real_ones = res[nonzeroind]
# real_ones = np.tanh(real_ones)
print(avg(real_ones))
zeroind =  [idx for idx, val in enumerate(y_test) if val == 0] 
real_zeros = res[zeroind]
# real_zeros = np.tanh(real_zeros)
print(avg(real_zeros))

In [None]:
import matplotlib.pyplot as plt 
fig = plt.figure()
plt.hist(real_zeros,np.arange(0,1,0.01));
fig.suptitle('Distribution of the predicted prominence score of the negative frames - NN', fontsize=20)
plt.xlabel('Prominence Score', fontsize=18)
plt.ylabel('Number of Frames', fontsize=16)

In [None]:
import matplotlib.pyplot as plt 
fig = plt.figure()
plt.hist(real_ones,np.arange(0,1,0.01));
fig.suptitle('Distribution of the predicted prominence score of the positive frames - NN', fontsize=20)
plt.xlabel('Prominence Score', fontsize=18)
plt.ylabel('Number of Frames', fontsize=16)

In [None]:
TP = len(list(filter(lambda x : x>0.5, real_ones)))
FP = len(list(filter(lambda x : x>0.5, real_zeros)))
TN = len(list(filter(lambda x : x<0.5, real_zeros)))
FN = len(list(filter(lambda x : x<0.5, real_ones)))

precision = TP /(TP+FP)
recall = TP / (TP+FN)
F1 = 2*precision*recall/(precision+recall)
print(precision,recall,F1)

####  Task D-3) Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf=RandomForestRegressor(max_depth=2, random_state=0)
rf.fit(X_train,y_train)

In [None]:
res = rf.predict(X_test)
def avg(lst):
    return sum(lst)/len(lst)

nonzeroind = np.nonzero(y_test)[0]
real_ones = res[nonzeroind]
print(avg(real_ones))
zeroind =  [idx for idx, val in enumerate(y_test) if val == 0] 
real_zeros = res[zeroind]
print(avg(real_zeros))


In [None]:
plt.hist(real_zeros,np.arange(0,1,0.01));

In [None]:
plt.hist(real_ones,np.arange(0,1,0.01));

In [None]:

import seaborn as sns
import matplotlib.pyplot as plt
featimp = pd.Series(rf.feature_importances_).T
ax = featimp.plot(kind='bar', title ="Acoustic Feature Importance", figsize=(15, 10), fontsize=12)
ax.set_xlabel("Feature Index", fontsize=20)
ax.set_ylabel("Feature importance", fontsize=20)


####  Task D-4) Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
logistic_regression = LogisticRegression(random_state=0,class_weight={1: 23}).fit(X_train, y_train)
predicted_values = logistic_regression.predict(X_test)
def avg(lst):
    return sum(lst)/len(lst)

nonzeroind = np.nonzero(y_test)[0]
real_ones = predicted_values[nonzeroind]
print(avg(real_ones))
zeroind =  [idx for idx, val in enumerate(y_test) if val == 0] 
real_zeros = predicted_values[zeroind]
print(avg(real_zeros))

In [None]:
import matplotlib.pyplot as plt 
fig = plt.figure()
plt.hist(real_zeros,np.arange(0,1,0.01));
fig.suptitle('Distribution of the predicted prominence score of the negative frames - Logistic', fontsize=20)
plt.xlabel('Prominence Score', fontsize=18)
plt.ylabel('Number of Frames', fontsize=16)

In [None]:
import matplotlib.pyplot as plt 
fig = plt.figure()
plt.hist(real_zeros,np.arange(0,1,0.01));
fig.suptitle('Distribution of the predicted prominence score of the positive frames - NN', fontsize=20)
plt.xlabel('Prominence Score', fontsize=18)
plt.ylabel('Number of Frames', fontsize=16)

In [None]:
TP = len(list(filter(lambda x : x>0.5, real_ones)))
FP = len(list(filter(lambda x : x>0.5, real_zeros)))
TN = len(list(filter(lambda x : x<0.5, real_zeros)))
FN = len(list(filter(lambda x : x<0.5, real_ones)))

precision = TP /(TP+FP)
recall = TP / (TP+FN)
F1 = 2*precision*recall/(precision+recall)
print(precision,recall,F1)

#### Task D-6) Final Score Assignment

In [None]:
with open('best_svm.sav', 'rb') as pickle_file:
    clf = pickle.load(pickle_file)

In [None]:
def acoustic_prominence_scorer(featureleng,alignment): 

    raw_scores = clf.predict(featureleng)
    scored_sequence = {}
    appearances = {}
    for x in alignment :
        if x[0] != '<eps>':
            start = math.ceil((x[1])/10)-1
            gap =  math.floor((x[2]/10- (x[2]%1)))+1
            end= start+gap
            relevant_scores = raw_scores[start:end]
            mean_score = sum(relevant_scores)/len(relevant_scores)
            if x[0] in appearances:
                new_freq = appearances[x[0]]+1
                appearances[x[0]] = new_freq
                scored_sequence[x[0]] = ((new_freq-1) * appearances[x[0]] + mean_score) / new_freq
            else:
                appearances[x[0]] = 1
                scored_sequence[x[0]] = mean_score
    return scored_sequence

def sigmoid(x):
    return 1 / (1 + math.exp(-x))

sigmoid_v = np.vectorize(sigmoid)

In [None]:
ALIGNMENTPATH = "alignments/"
FEATUREPATH = "./acoustic_feats_170520/"
exploretable = pd.read_pickle("./exploretable16052020.pkl")
text = exploretable.iloc[4]['tran']
textdesc = exploretable.iloc[4]['desc']
chosenid = exploretable.iloc[4]['id']
alignment =  eval(open(f'{ALIGNMENTPATH}{chosenid}.txt', "r").read())
feats = scipy.io.loadmat(FEATUREPATH+chosenid)['ret']
feats = preprocessing.scale(feats)

In [None]:
data =  acoustic_prominence_scorer(feats,alignment)

### Task E) Audio Generation

####  Task E1) Mode 1 - Extractive Summarisation

In [None]:
ALIGNMENTPATH = "alignments/"
exploretable = pd.read_pickle("./exploretable16052020.pkl")
text = exploretable.iloc[2]['tran']
textdesc = exploretable.iloc[2]['desc']
chosenid = exploretable.iloc[2]['id']
alignment =  eval(open(f'{ALIGNMENTPATH}{chosenid}.txt', "r").read())
summary = extractive_sum(text,word_count=40)
# print(summary)
# print(text)

In [None]:
def find_start_indexes(alignment,summary):
    indexes = []
    for idx, i in enumerate(alignment):
        if (summary[0] == i[0] ):
            indexes.append(idx)
    return indexes

def find_end_indexes(alignment,summary):
    lastword = summary[-1]
    if lastword in string.punctuation: 
        return find_end_indexes(alignment,summary[:-1])
    indexes = [index for index, m in enumerate(alignment) if re.match(f"\('{lastword}', \d+, \d+\)", str(m))]

    if (indexes != []):
        return indexes
    else:
        return find_end_indexes(alignment,list(reversed(summary))[:-1])

start_indexes = find_start_indexes(alignment,summary.split(" "))      
end_indexes = find_end_indexes(alignment,summary.split(" "))      

In [None]:
editdistance = 99999
for i in start_indexes:
    for j in end_indexes:
        candidate = alignment[i:j+1]
        candidate = [x[0] for x in candidate if x[0] != '<eps>']
        candidate = " ".join(candidate)
        candidatedistance = Levenshtein.distance(candidate,summary)
        if ( candidatedistance < editdistance):
            editdistance = candidatedistance
            mini = i
            minj = j

In [None]:
audiostart = alignment[mini][1]*10
audioend= (alignment[minj][1]+alignment[minj][2])*10
originalutterance = AudioSegment.from_wav(f'./speech_audios/{chosenid}.wav')
play(originalutterance[audiostart:audioend])
print(audiostart)
print(audioend)

####  Task E2) Mode 2 - Abstractive Summarisation

In [None]:
# from transformers import pipeline
import pandas as pd
# abstractive_sum = pipeline(task="summarization")
exploretable = pd.read_pickle("./exploretable16052020.pkl")
text = exploretable.iloc[0]['tran']
# summary = abstractive_sum(
#     text,
#     max_length=80
# )

print(text)
# print(" ")
# print(summary[0]['summary_text'])

In [None]:
exploretable
desc_list = exploretable['desc'].tolist()
tran_list = exploretable['tran'].tolist()

In [None]:
from sklearn.model_selection import train_test_split
tran_train, tran_val, desc_train, desc_val= train_test_split(
    tran_list, desc_list, test_size=0.25, random_state=42)

tran_dev, tran_test, desc_dev, desc_test= train_test_split(
    tran_val, desc_val, test_size=0.5, random_state=42)

In [None]:
desc_tran_train = list(zip(tran_train,desc_train))
desc_tran_dev = list(zip(tran_dev,desc_dev))
desc_tran_test = list(zip(tran_test,desc_test))

In [None]:
desc_tran_test[0]


In [None]:
f = open('train.tsv', 'w')
for x in desc_tran_train:
    f.write(x[0]+"\t"+x[1]+"\n")
f.close()
print("done.")

####  Task E3) Mode 3 - Hybrid Approach

In [None]:
import nlpete.training.metrics
import nlpete.data.dataset_readers
from nlpete.models.copynet import CopyNet
from allennlp.data.fields.text_field import TextFieldTensors
from overrides import overrides
from allennlp.models.archival import load_archive
from allennlp.models.model import Model
from allennlp.data import DatasetReader
from allennlp.common.util import JsonDict
from allennlp.data import Instance
from nlpete.data.dataset_readers import (
    CopyNetDatasetReader,
) 
from allennlp.predictors import Predictor
import warnings
class CopyNetPredictor(Predictor):
    """
    Predictor for the CopyNet model.
    """

    def __init__(self, model: Model, dataset_reader: DatasetReader) -> None:
        super().__init__(model, dataset_reader)
        warnings.warn(
            "The 'copynet' predictor has been deprecated in favor of "
            "the 'seq2seq' predictor.",
            DeprecationWarning,
        )

    def predict(self, source: str,acoustic_data: str) -> JsonDict:
        return self.predict_json({"source_string": source,"acoustic_data":acoustic_data})

    @overrides
    def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        source = json_dict["source_string"]
        acoustic_data = json_dict["acoustic_data"]
        return self._dataset_reader.text_to_instance(source,acoustic_data)

In [None]:
exploretable = pd.read_pickle("./exploretable16052020.pkl")
# input_string = tran = exploretable.iloc[15]['tran']
input_string = "after you 've done at least six to twelve rounds of sun salutation in sivananda yoga we start on the main practice . we will begin with leg raises . it really helps open up the core , strengthen the core , and it 's just a wonderful way to start . so i 'm going to have lauren here lay down on her back . beautiful . now bring your arms down to your sides , palms on the earth . if you need any little assistance in this , because it is a lot of core work through here , you can place the hands underneath the hips if you need just a little bit of assistance . when i ask lauren here to engage the feet , nice deep flexion of the feet . pressing through the heels , toes to the nose . and we 're going to start with single leg raises , and we 're going to have you do three of each , on each side . starting very slowly , inhaling up the right leg . about five seconds , slowly coming up . exhaling five , four , three , two , one . inhaling up right , five , four , three , two , one . lowering down slowly , five , four , three , two , one"
archive = load_archive('./absummodel3.tar.gz')
predictor = CopyNetPredictor.from_archive(archive)
results = predictor.predict(input_string,acoustic_data)

In [None]:

for x in results['predicted_tokens']:
    print(" ".join(x))


### ROUGE Plots

In [None]:
# Loading Examples
from rouge import Rouge 
rouge = Rouge()
exploretable = pd.read_pickle("./exploretable16052020.pkl")
archive = load_archive('./absummodel3.tar.gz')
predictor = CopyNetPredictor.from_archive(archive)


rouge_dict = {}
for i in range(1,500):
    print(i)
    chosenid = exploretable.iloc[i]['id']
    tran = exploretable.iloc[i]['tran']
    desc = exploretable.iloc[i]['desc']
    
#     print(desc)

#     results = predictor.predict(tran,[1,1,1])
    summary = extractive_sum(tran,word_count=40)
    print(summary)
#     maximum_onef = 0 
#     for sentence in results['predicted_tokens']:
#         summary = " ".join(sentence)
#         print(summary)
    try:
        rouge_scores = rouge.get_scores(summary, desc)
    except:
        print(f"{i} failed")
#         one_f = rouge_scores[0]['rouge-1']['f']
#         if one_f > maximum_onef:
#             maximum_rouge= rouge_scores
#             bestsummary = summary
    rouge_dict[chosenid] = rouge_scores
    
print(rouge_dict)


In [None]:
from pandas.io.json import json_normalize
temp_df = pd.DataFrame.from_dict(rouge_dict, orient='index',columns=['A'])
df_pol_ps = temp_df['A'].apply(pd.Series)
df_pol_1 = df_pol_ps['rouge-1'].apply(pd.Series).rename(columns={"f": "rouge-1-f", "p": "rouge-1-p", "r": "rouge-1-r"})
df_pol_2 = df_pol_ps['rouge-2'].apply(pd.Series).rename(columns={"f": "rouge-2-f", "p": "rouge-2-p", "r": "rouge-2-r"})
df_pol_l = df_pol_ps['rouge-l'].apply(pd.Series).rename(columns={"f": "rouge-l-f", "p": "rouge-l-p", "r": "rouge-l-r"})
rouge_df = df_pol_1.join(df_pol_2).join(df_pol_l)
rouge_df.to_pickle("./rouge_df_mode1.pkl")

In [None]:
rouge_df.head()

In [None]:
rouge_df.sort_values(['rouge-l-f'], ascending=False)

In [None]:
rouge_df.mean(axis = 0).plot.bar(x='lab', y='val',color=['blue', 'red', 'green']*3)

In [None]:
import spacy

In [None]:
input_string = "after you 've done at least six to twelve rounds of sun salutation in sivananda yoga we start on the main practice . we will begin with leg raises . it really helps open up the core , strengthen the core , and it 's just a wonderful way to start . so i 'm going to have lauren here lay down on her back . beautiful . now bring your arms down to your sides , palms on the earth . if you need any little assistance in this , because it is a lot of core work through here , you can place the hands underneath the hips if you need just a little bit of assistance . when i ask lauren here to engage the feet , nice deep flexion of the feet . pressing through the heels , toes to the nose . and we 're going to start with single leg raises , and we 're going to have you do three of each , on each side . starting very slowly , inhaling up the right leg . about five seconds , slowly coming up . exhaling five , four , three , two , one . inhaling up right , five , four , three , two , one . lowering down slowly , five , four , three , two , one"
archive = load_archive('./absummodel3.tar.gz')
predictor = CopyNetPredictor.from_archive(archive)
results = predictor.predict(input_string,[1,1,1])

In [None]:
for sentence in results['predicted_tokens']:
    print(" ".join(sentence))

In [None]:
import pickle
import seaborn as sns
sns.set(style="whitegrid")
sns.set(rc={'figure.figsize':(11.7,8.27),'font.size': 32, 'axes.labelsize': 16,  
    'axes.titlesize': 24, 'xtick.labelsize': 16, 'ytick.labelsize': 16})
colors = ['tab:blue','tab:orange','tab:green','tab:red','tab:purple']
mode_1_df = pickle.load( open( "rouge_df_mode1.pkl", "rb" ) )
mode_1_df.sort_values("rouge-1-f",ascending=False)
mode_1_data = mode_1_df.mean(axis = 0)
# mode_1_data
ax = sns.barplot(mode_1_data.index,mode_1_data.values).set( title = 'Mode 1 ROUGE Scores', xlabel = 'Summarisation Mode', ylabel = 'ROUGE Score' )


In [None]:
import pickle
mode_2_df = pickle.load( open( "rouge_df_mode2.pkl", "rb" ) )
mode_2_df.sort_values("rouge-1-f",ascending=False)
mode_2_data = mode_2_df.mean(axis = 0)


sns.set(style="whitegrid")
sns.set(rc={'figure.figsize':(11.7,8.27),'font.size': 32, 'axes.labelsize': 16,  
    'axes.titlesize': 24, 'xtick.labelsize': 16, 'ytick.labelsize': 16})
colors = ['tab:blue','tab:orange','tab:green','tab:red','tab:purple']

# mode_1_data
ax = sns.barplot(mode_2_data.index,mode_2_data.values).set( title = 'Mode 2 ROUGE Scores', xlabel = 'Summarisation Mode', ylabel = 'ROUGE Score' )


In [None]:
import pickle
mode_3_df = pickle.load( open( "rouge_df_model3.pkl", "rb" ) )
mode_3_df.sort_values("rouge-1-f",ascending=False)
mode_3_data = mode_3_df.mean(axis = 0)


sns.set(style="whitegrid")
sns.set(rc={'figure.figsize':(11.7,8.27),'font.size': 32, 'axes.labelsize': 16,  
    'axes.titlesize': 24, 'xtick.labelsize': 16, 'ytick.labelsize': 16})
colors = ['tab:blue','tab:orange','tab:green','tab:red','tab:purple']

# mode_1_data
ax = sns.barplot(mode_3_data.index,mode_3_data.values).set( title = 'Mode 3 ROUGE Scores', xlabel = 'Summarisation Mode', ylabel = 'ROUGE Score' )


In [None]:
import pickle
# mode_2_df = pickle.load( open( "rouge_df_mode2.pkl", "rb" ) )
# mode_2_df.sort_values("rouge-1-f",ascending=False)
# mode_2_data = mode_2_df.mean(axis = 0)

threeveetwo = (mode_3_data - mode_2_data)*100

sns.set(style="whitegrid")
sns.set(rc={'figure.figsize':(11.7,8.27),'font.size': 32, 'axes.labelsize': 16,  
    'axes.titlesize': 24, 'xtick.labelsize': 16, 'ytick.labelsize': 16})
colors = ['tab:blue','tab:orange','tab:green','tab:red','tab:purple']

# mode_1_data
ax = sns.barplot(threeveetwo.index,threeveetwo.values).set( title = 'Mode 3 vs Mode 2 ROUGE Scores', xlabel = 'Summarisation Mode', ylabel = 'ROUGE Score Percentage Change' )


In [None]:
from rouge import Rouge 
rouge = Rouge()
rouge_dict = {}
for i in range(1,500):
    print(i)
    try:
        youtube_id = exploretable.iloc[i]['id']
    except:
        continue
    print(youtube_id)
    tran = exploretable.iloc[i]['tran']
    desc = exploretable.iloc[i]['desc']
    try:
        sentences = mode_3_summary(youtube_id,tran,desc)
    except:
        continue
    maximum_onef = 0
    for candidate in sentences:
        try:
            rouge_scores = rouge.get_scores(candidate, desc)
        except:
            print(f"{i} failed")
        one_f = rouge_scores[0]['rouge-1']['f']
        if one_f > maximum_onef:
            maximum_rouge= rouge_scores
    rouge_dict[youtube_id] = maximum_rouge
    

In [None]:
temp_df = pd.DataFrame.from_dict(rouge_dict, orient='index',columns=['A'])
df_pol_ps = temp_df['A'].apply(pd.Series)
df_pol_1 = df_pol_ps['rouge-1'].apply(pd.Series).rename(columns={"f": "rouge-1-f", "p": "rouge-1-p", "r": "rouge-1-r"})
df_pol_2 = df_pol_ps['rouge-2'].apply(pd.Series).rename(columns={"f": "rouge-2-f", "p": "rouge-2-p", "r": "rouge-2-r"})
df_pol_l = df_pol_ps['rouge-l'].apply(pd.Series).rename(columns={"f": "rouge-l-f", "p": "rouge-l-p", "r": "rouge-l-r"})
rouge_df = df_pol_1.join(df_pol_2).join(df_pol_l)
rouge_df.to_pickle("./rouge_df_model3.pkl")

#### Compilation

In [None]:
best_svm = pickle.load( open( "best_svm.sav", "rb" ) )
for i in best_svm.predict(X_test):
    print(i)

In [None]:
res = clf.predict(X_test)
def avg(lst):
    return sum(lst)/len(lst)

nonzeroind = np.nonzero(y_test)[0]
real_ones = res[nonzeroind]
print(avg(real_ones))
zeroind =  [idx for idx, val in enumerate(y_test) if val == 0] 
real_zeros = res[zeroind]
print(avg(real_zeros))

### Scratch area


In [None]:
plt.figure(1)

plot_a = plt.subplot(211)

plot_a.plot(sig)
plot_a.set_xlabel('Sample number (8kHz sample rate)')
plot_a.set_ylabel('energy')

plot_b = plt.subplot(212)
plot_b.specgram(sig, NFFT=1024, Fs=sample_rate, noverlap=900)
plot_b.set_xlabel('Time')
plot_b.set_ylabel('Frequency')


plt.savefig('waveform.png')

In [None]:
import pandas as pd
exploretable = pd.read_pickle("./exploretable16052020.pkl")
row = exploretable.iloc[0]

In [None]:
row

In [None]:
import nlpete.training.metrics
import nlpete.data.dataset_readers
from nlpete.models import copynet
from allennlp.data.fields.text_field import TextFieldTensors
from overrides import overrides
from allennlp.models.archival import load_archive
from allennlp.models.model import Model
from allennlp.data import DatasetReader
from allennlp.common.util import JsonDict
from allennlp.data import Instance
from nlpete.data.dataset_readers import (
    CopyNetDatasetReader,
) 
from allennlp.predictors import Predictor
import warnings
class CopyNetPredictor(Predictor):
    """
    Predictor for the CopyNet model.
    """

    def __init__(self, model: Model, dataset_reader: DatasetReader) -> None:
        super().__init__(model, dataset_reader)
        warnings.warn(
            "The 'copynet' predictor has been deprecated in favor of "
            "the 'seq2seq' predictor.",
            DeprecationWarning,
        )

    def predict(self, source: str) -> JsonDict:
        return self.predict_json({"source_string": source})

    @overrides
    def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        source = json_dict["source_string"]
        return self._dataset_reader.text_to_instance(source)

In [None]:
archive = load_archive('./model19.tar.gz')
predictor = CopyNetPredictor.from_archive(archive)
predictor.predict_json({"source_string":"find all wav in this directory"})

In [None]:
subj = pd.read_csv('help_dayan.csv')

In [None]:
subj

In [None]:
a_class = [1,2,3,16,17,18,31,32,33,46,47,48]
b_class = [x+3 for x in a_class]
c_class = [x+3 for x in b_class]
d_class = [x+3 for x in c_class]
e_class = [x+3 for x in d_class]

def merge_on_sample(input_series):

    informativeness = input_series[[0,3,6,9]].mean(axis=0)
    understanding = input_series[[1,4,7,10]].mean(axis=0)
    quality = input_series[[2,5,8,11]].mean(axis=0)
    output_series = {
        "informativeness":informativeness,
        "understanding":understanding,
        "quality":quality
    }
    return output_series

def extract_informativeness(input_df):
    return input_df[input_df.columns[[0,3,6,9]]].dropna().values.flatten()

def extract_understanding(input_df):
    return input_df[input_df.columns[[1,4,7,10]]].dropna().values.flatten()

def extract_quality(input_df):
    return input_df[input_df.columns[[2,5,8,11]]].dropna().values.flatten()

In [None]:
a_class_df = subj[subj.columns[a_class]]
a_class_informativeness = (extract_informativeness(a_class_df)).mean()
a_class_understanding = (extract_understanding(a_class_df)).mean()
a_class_quality = (extract_quality(a_class_df))
a_class_quality

In [None]:
b_class_df = subj[subj.columns[b_class]]
b_class_informativeness = extract_informativeness(b_class_df)
b_class_understanding = extract_understanding(b_class_df)
b_class_quality = extract_quality(b_class_df)


In [None]:
c_class_df = subj[subj.columns[c_class]]
c_class_informativeness = extract_informativeness(c_class_df)
c_class_understanding = extract_understanding(c_class_df)
c_class_quality = extract_quality(c_class_df)


In [None]:
d_class_df = subj[subj.columns[d_class]]
d_class_informativeness = extract_informativeness(d_class_df)
d_class_understanding = extract_understanding(d_class_df)
d_class_quality = extract_quality(d_class_df)


In [None]:
e_class_df = subj[subj.columns[e_class]]
e_class_informativeness = extract_informativeness(e_class_df)
e_class_understanding = extract_understanding(e_class_df)
e_class_quality = extract_quality(e_class_df)

In [None]:
import scipy.stats as stats

In [None]:

stats.f_oneway(a_class_quality,b_class_quality,c_class_quality,d_class_quality,e_class_quality)

In [None]:
# stats.f_oneway(b_class_informativeness, c_class_informativeness)
# stats.f_oneway(b_class_understanding, c_class_understanding)
stats.f_oneway(b_class_quality, c_class_quality)

In [None]:
import seaborn as sns
sns.set(style="whitegrid")

data = [a_class_informativeness,b_class_informativeness, c_class_informativeness,d_class_informativeness,e_class_informativeness]
colors = ['tab:blue','tab:orange','tab:green','tab:red','tab:purple']
fig, ax = plt.subplots()
fig.set_size_inches(18.5, 10.5)
fig.suptitle('Informativeness MOS scores', fontsize=30)
ax.set_ylabel('MOS Score',fontsize=20)
ax.set_xlabel('Summarisation Mode',fontsize=20)
box_dict = ax.boxplot(data, patch_artist=True,  showmeans=True)
for item in ['boxes', 'fliers', 'medians', 'means']:
    for sub_item,color in zip(box_dict[item], colors):
        plt.setp(sub_item, color=color)
# whiskers and caps have to be treated separately since there are two of each for each plot
for item in ['whiskers', 'caps']:
    for sub_items,color in zip(zip(box_dict[item][::2],box_dict[item][1::2]),colors):
        plt.setp(sub_items, color=color)

In [None]:
import seaborn as sns
sns.set(style="whitegrid")

data = [a_class_understanding,b_class_understanding, c_class_understanding,d_class_understanding,e_class_understanding]
colors = ['tab:blue','tab:orange','tab:green','tab:red','tab:purple']
fig, ax = plt.subplots()
fig.set_size_inches(18.5, 10.5)
fig.suptitle('Understanding MOS scores', fontsize=30)
ax.set_ylabel('MOS Score',fontsize=20)
ax.set_xlabel('Summarisation Mode',fontsize=20)
box_dict = ax.boxplot(data, patch_artist=True,  showmeans=True)
for item in ['boxes', 'fliers', 'medians', 'means']:
    for sub_item,color in zip(box_dict[item], colors):
        plt.setp(sub_item, color=color)
# whiskers and caps have to be treated separately since there are two of each for each plot
for item in ['whiskers', 'caps']:
    for sub_items,color in zip(zip(box_dict[item][::2],box_dict[item][1::2]),colors):
        plt.setp(sub_items, color=color)

In [None]:
import seaborn as sns
sns.set(style="whitegrid")

data = [a_class_quality,b_class_quality, c_class_quality,d_class_quality,e_class_quality]
colors = ['tab:blue','tab:orange','tab:green','tab:red','tab:purple']
fig, ax = plt.subplots()
fig.set_size_inches(18.5, 10.5)
fig.suptitle('Quality MOS scores', fontsize=30)
ax.set_ylabel('MOS Score',fontsize=20)
ax.set_xlabel('Summarisation Mode',fontsize=20)
box_dict = ax.boxplot(data, patch_artist=True,  showmeans=True)
for item in ['boxes', 'fliers', 'medians', 'means']:
    for sub_item,color in zip(box_dict[item], colors):
        plt.setp(sub_item, color=color)
# whiskers and caps have to be treated separately since there are two of each for each plot
for item in ['whiskers', 'caps']:
    for sub_items,color in zip(zip(box_dict[item][::2],box_dict[item][1::2]),colors):
        plt.setp(sub_items, color=color)