# 6.8630 Final Project

In [125]:
import numpy as np
import matplotlib.pyplot as plt
import nltk

from tools import PcfgGrammar, PcfgGenerator, flatten_tree
from tools.grammar import DynamicGrammar
from nltk import Nonterminal
from nltk.grammar import ProbabilisticProduction
import random


In [126]:
main_grammar = PcfgGrammar.read_grammar(grammar_files=['./main-data/grammar/S1.gr', './main-data/grammar/S1_Vocab.gr'], allowed_words_file='./main-data/allowed_words.txt')

### Helper Functions

In [127]:
def parse_sentence(grammar, sentence, verbose=False):
    tokens = sentence.split()
    try:
        tree = grammar.get_most_probable_parse(tokens)
        if (tree is None) and verbose:
            print(f'no parse tree found for `{sentence}`')
        return tree
    except ValueError as e:
        if verbose:
            print(f'cannot parse `{sentence}`')
            print(e)
        return None


def parse_many_sentences(grammar, sentences, verbose=False):
    parse_trees = []
    for line in sentences:
        parse_trees.append(parse_sentence(grammar, line, verbose))
    return parse_trees


def read_file_lines(text_file):
    lines = []
    with open(text_file, 'r') as f:
        for line in f:
            line = line.strip()
            if line.startswith('#'):
                continue  # comment
            lines.append(line)
    return lines


def parse_file(grammar, text_file, verbose=False):
    sentences = read_file_lines(text_file)
    return parse_many_sentences(grammar, sentences, verbose)


def cross_entropy_file(grammar, text_file, verbose=False):
    return grammar.get_cross_entropy_over_sentences(read_file_lines(text_file), verbose)

def generate_random_sentences(grammar, num_sentences=5, random_seed=None):
    if random_seed is None:
        random_seed = random.randint(0, 100)
    gen = PcfgGenerator(grammar=grammar, random_seed=random_seed)
    sample_parsed_trees = [gen.generate() for _ in range(num_sentences)]
    sample_sentences = [" ".join(flatten_tree(tree)) for tree in sample_parsed_trees]
    return sample_sentences

def combine_grammars(s1_grammar, s2_grammar, prob_start_to_s1=0.99):
    dynamic_merged = DynamicGrammar(s1_grammar.productions() + s2_grammar.productions())
    dynamic_merged.update_weight(lhs=Nonterminal("START"), rhs=(Nonterminal("S1"),), weight=prob_start_to_s1)
    dynamic_merged.update_weight(lhs=Nonterminal("START"), rhs=(Nonterminal("S2"),), weight=1-prob_start_to_s1)
    merged_grammar = dynamic_merged.get_pcfg_grammar()
    return merged_grammar

In [128]:
def export_grammars(grammar, name):
    ds1 = DynamicGrammar(grammar.productions())
    ds1.update_weight(lhs=Nonterminal("START"), rhs=(Nonterminal("SX"),), weight=1) #does this work

    ds1.export_to_file(vocab_filepath='./main-data/grammar/'+name+'-vocab.gr', rules_filepath='./main-data/grammar/'+name+'.gr') #can we remove the vocab

def song_part_grammar(type):
    train_productions = []
    for sentence in open('./main-data/song-parts/'+type+'.sen', 'r'):
        tokens = sentence.strip().split()
        if main_grammar.can_parse(tokens):
            tree = main_grammar.get_most_probable_parse(tokens)
            train_productions.extend(tree.productions())

    nltk_induced_grammar = nltk.induce_pcfg(main_grammar.start(), train_productions)

    # create a new pcfg grammar
    new_grammar = PcfgGrammar(productions=nltk_induced_grammar.productions())

    export_grammars(new_grammar, type)

In [129]:
#creates the grammar for all song parts
def set_up():
    song_part_grammar('intro')
    song_part_grammar('verse')
    song_part_grammar('chorus')
    song_part_grammar('pre-chorus')
    song_part_grammar('post-chorus')
    song_part_grammar('hook')
    song_part_grammar('bridge')
    song_part_grammar('outro')
    intro_grammar = PcfgGrammar.read_grammar(grammar_files=['./main-data/grammar/intro.gr', './main-data/grammar/intro-vocab.gr'], allowed_words_file='./main-data/allowed_words.txt')
    verse_grammar = PcfgGrammar.read_grammar(grammar_files=['./main-data/grammar/verse.gr', './main-data/grammar/verse-vocab.gr'], allowed_words_file='./main-data/allowed_words.txt')
    chorus_grammar = PcfgGrammar.read_grammar(grammar_files=['./main-data/grammar/chorus.gr', './main-data/grammar/chorus-vocab.gr'], allowed_words_file='./main-data/allowed_words.txt')
    pre_chorus_grammar = PcfgGrammar.read_grammar(grammar_files=['./main-data/grammar/pre-chorus.gr', './main-data/grammar/pre-chorus-vocab.gr'], allowed_words_file='./main-data/allowed_words.txt')
    bridge_grammar = PcfgGrammar.read_grammar(grammar_files=['./main-data/grammar/bridge.gr', './main-data/grammar/bridge-vocab.gr'], allowed_words_file='./main-data/allowed_words.txt')
    hook_grammar = PcfgGrammar.read_grammar(grammar_files=['./main-data/grammar/hook.gr', './main-data/grammar/hook-vocab.gr'], allowed_words_file='./main-data/allowed_words.txt')
    outro_grammar = PcfgGrammar.read_grammar(grammar_files=['./main-data/grammar/outro.gr', './main-data/grammar/outro-vocab.gr'], allowed_words_file='./main-data/allowed_words.txt')
    return intro_grammar, verse_grammar, chorus_grammar, pre_chorus_grammar, bridge_grammar, hook_grammar, outro_grammar

In [130]:
def create_song(i, v1, pc, c, v2, b, h, o):
    intro_grammar, verse_grammar, chorus_grammar, pre_chorus_grammar, bridge_grammar, hook_grammar, outro_grammar = set_up()
    intro = generate_random_sentences(intro_grammar, num_sentences = i)
    verse1 = generate_random_sentences(verse_grammar, num_sentences = v1)
    pre_chorus = generate_random_sentences(pre_chorus_grammar, num_sentences = pc)
    chorus = generate_random_sentences(chorus_grammar, num_sentences = c)
    verse2 = generate_random_sentences(verse_grammar, num_sentences = v2)
    bridge = generate_random_sentences(bridge_grammar, num_sentences = b)
    hook = generate_random_sentences(hook_grammar, num_sentences = h)
    outro = generate_random_sentences(outro_grammar, num_sentences = o)
    output = []
    return [["intro: "] + intro , 
            ["verse 1: "] + verse1,  
            ["pre chorus: "] + pre_chorus , 
            ["chorus: "] + chorus, 
            ["verse 2: "] + verse2 , 
            ["pre chorus: "] + pre_chorus, 
            ["chorus: "]+ chorus, 
            ["bridge: "] + bridge,
            ["hook: "] + hook,
            ["chorus: "] + chorus,
            ["outro: "] + outro]

In [131]:
create_song(7,6,4,6,6,7,5,5)

[['intro: ',
  'ah-ah-ah , ah-ah-ah',
  'one two three four',
  'one two three four',
  'one two three four',
  'ah',
  'one two three four',
  'ah'],
 ['verse 1: ',
  'but who spent the Christmas ?',
  'all headphones said eighth grade easy',
  'thought they',
  "we are about , 'cause some dances call mad of",
  'my kids',
  "I'm"],
 ['pre chorus: ',
  'kicking up Disney ?',
  'am I bad for friend',
  "or I said she need I don't give a shit making",
  'they wanna go for hit'],
 ['chorus: ',
  "I'm made up , I'm after all talking out",
  "talking up everybody , I'll be there outta you",
  "I'm this great",
  "and I'm fall for next time so it wanna like a scene one",
  "boy I'm this famous to the today",
  "wrong drama 's talking"],
 ['verse 2: ',
  "I'm whole fuckin' real friends drove , I complain not good",
  "your sparks didn't get seriously a finale",
  'I promise and find',
  'a peace feel my mind on Netflix Trip',
  'but are these two habits maybe , hardly',
  'we crossed me'],
 

#### Gustavo's songs:

Come Hang Out (parsed)

Drama (parsed)

I'm not Famous (parsed)

Three Thirty (parsed)

Turning Out (parsed)

The Good Part (parsed)

In [132]:
#parsed_trees = parse_file(main_grammar, './main-data/songs/come-hang-out.sen', verbose=True)

In [133]:
#parsed_trees = parse_file(main_grammar, './main-data/songs/drama.sen', verbose=True)

In [134]:
#parsed_trees = parse_file(main_grammar, './main-data/songs/im-not-famous.sen', verbose=True)

In [135]:
#parsed_trees = parse_file(main_grammar, './main-data/songs/three-thirty.sen', verbose=True)

In [136]:
#parsed_trees = parse_file(main_grammar, './main-data/songs/turning-out.sen', verbose=True)

In [137]:
#parsed_trees = parse_file(main_grammar, './main-data/songs/the-good-part.sen', verbose=True)

#### Mckinley's Songs:

In [138]:
#parsed_trees = parse_file(s1_grammar, './main-data/songs/.sen', verbose=True)

In [139]:
parsed_trees = parse_file(main_grammar, './main-data/song-parts/intro.sen', verbose=True)
parsed_trees = parse_file(main_grammar, './main-data/song-parts/verse.sen', verbose=True)
parsed_trees = parse_file(main_grammar, './main-data/song-parts/chorus.sen', verbose=True)
parsed_trees = parse_file(main_grammar, './main-data/song-parts/hook.sen', verbose=True)
parsed_trees = parse_file(main_grammar, './main-data/song-parts/outro.sen', verbose=True)
parsed_trees = parse_file(main_grammar, './main-data/song-parts/bridge.sen', verbose=True)
parsed_trees = parse_file(main_grammar, './main-data/song-parts/pre-chorus.sen', verbose=True)
parsed_trees = parse_file(main_grammar, './main-data/song-parts/post-chorus.sen', verbose=True)