In [1]:
import time
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt

In [2]:
def unzip(pairs):
    """
    "unzips" of groups of items into separate tuples.
    
    Example: pairs = [("a", 1), ("b", 2), ...] --> (("a", "b", ...), (1, 2, ...))
    
    Parameters
    ----------
    pairs : Iterable[Tuple[Any, ...]]
        An iterable of the form ((a0, b0, c0, ...), (a1, b1, c1, ...))
    
    Returns
    -------
    Tuple[Tuples[Any, ...], ...]
       A tuple containing the "unzipped" contents of `pairs`; i.e. 
       ((a0, a1, ...), (b0, b1, ...), (c0, c1), ...)
    """
    return tuple(zip(*pairs))

In [3]:
path_to_notes = r"C:\Users\g_bab\Downloads\musicdata.txt"

with open(path_to_notes, "rb") as f:
    notes = f.read().decode()
    notes = notes.split()
print(str(len(notes)) + " character(s)")

788 character(s)


In [4]:
print(notes[:30])

['E', 'Ds', 'E', 'Ds', 'E', 'B', 'Db', 'C', 'A', 'C', 'E', 'A', 'B', 'Es', 'G', 'B', 'C', 'E', 'E', 'Ds', 'E', 'Ds', 'E', 'B', 'D', 'C', 'C', 'E', 'A', 'B']


In [5]:
from collections import Counter
counter = Counter(notes)

print(counter)

Counter({'E': 176, 'C': 132, 'A': 127, 'B': 87, 'D': 76, 'G': 63, 'Ds': 49, 'F': 26, 'Bb': 19, 'Gs': 14, 'Es': 6, 'Fs': 6, 'Db': 3, 'Cs': 3, 'Eb': 1})


In [6]:
import string
unique_notes = set(notes)
counts = [(note, cnt) for note, cnt in counter.most_common() if note in unique_notes]
print(counts)

[('E', 176), ('C', 132), ('A', 127), ('B', 87), ('D', 76), ('G', 63), ('Ds', 49), ('F', 26), ('Bb', 19), ('Gs', 14), ('Es', 6), ('Fs', 6), ('Db', 3), ('Cs', 3), ('Eb', 1)]


In [7]:

#would have also been possible to note down how long each note lasted in our "training data" to output "trained" lengths - didn't out of time constraint 
def normalize(counter):
    """ Convert a `note -> count` counter to a list 
    of (note, frequency of note's appearance) pairs, sorted in descending order of 
    frequency.

    Parameters
    -----------
    counter : collections.Counter
        note -> count

    Returns
    -------
    List[Tuple[str, float]]
       A list of tuples - (note, frequency) pairs in order
       of descending-frequency

    Examples
    --------
    >>> from collections import Counter
    >>> note_count = Counter({"a": 1, "b": 3})
    >>> note_count
    Counter({'a': 1, 'b': 3})

    >>> normalize(note_count)
    [('b', 0.75), ('a', 0.25)]
    """
    
    total = sum(counter.values())
    return [(char, cnt/total) for char, cnt in counter.most_common()]

In [8]:
freqs = normalize(counter)

In [9]:
fig, ax = plt.subplots()
labels, values = unzip(freqs)

ax.bar(range(len(counts)), values, align='center', alpha=0.5)
ax.set_xticks(range(len(labels)))
ax.set_xticklabels(labels)

ax.set_ylabel('Frequency')
ax.set_title('Frequency of Notes in E-minor songs')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Frequency of Notes in E-minor songs')

In [28]:
from collections import defaultdict

def train_lm(text, n):
    """ Train character-based n-gram language model.
        
    This will learn: given a sequence of n-1 characters, what the probability
    distribution is for the n-th character in the sequence.

    For example if we train on the text:
        text = "cacao"

    Using a n-gram size of n=3, then the following dict would be returned.
    See that we *normalize* each of the counts for a given history

        {'ac': [('a', 1.0)],
         'ca': [('c', 0.5), ('o', 0.5)],
         '~c': [('a', 1.0)],
         '~~': [('c', 1.0)]}

    Tildas ("~") are used for padding the history when necessary, so that it's 
    possible to estimate the probability of a seeing a character when there 
    aren't (n - 1) previous characters of history available.

    So, according to this text we trained on, if you see the sequence 'ac',
    our model predicts that the next character should be 'a' 100% of the time.

    For generating the padding, recall that Python allows you to generate 
    repeated sequences easily: 
       `"p" * 4` returns `"pppp"`

    Parameters
    -----------
    text: str 
        A string (doesn't need to be lowercased).
        
    n: int
        The length of n-gram to analyze.

    Returns
    -------
    Dict[str, List[Tuple[str, float]]]
        
        {n-1 history -> [(letter, normalized count), ...]}
        
        A dictionary that maps histories (strings of length (n-1)) to lists of (char, prob) 
        pairs, where prob is the probability (i.e frequency) of char appearing after 
        that specific history.

    Examples
    --------
    >>> train_lm("cacao", 3)
    {'ac': [('a', 1.0)],
     'ca': [('c', 0.5), ('o', 0.5)],
     '~c': [('a', 1.0)],
     '~~': [('c', 1.0)]}
    """
    # <COGINST>
    text = text.split()
    raw_lm = defaultdict(Counter)
    
    history = []
    for i in range(n-1):
        history.append("~")
    
    
    #count = Counter(text)
    
    # count number of times characters appear following different histories
    # `raw_lm`: {history -> Counter}
    for char in text:
        raw_lm[str(history)][char] += 1
        # slide history window to the right by one character
        history = history[1:]
        history.append(char)
    print(raw_lm)
    # create final dictionary, normalizing the counts for each history
    lm = {history : normalize(counter) for history, counter in raw_lm.items()}
    
    return lm

In [29]:
def generate_note(lm, history):
    """ Randomly picks letter according to probability distribution associated with 
    the specified history, as stored in your language model.

    Note: returns dummy character "~" if history not found in model.

    Parameters
    ----------
    lm: Dict[str, List[Tuple[str, float]]] 
        The n-gram language model. 
        I.e. the dictionary: history -> [(char, freq), ...]

    history: str
        A string of length (n-1) to use as context/history for generating 
        the next character.

    Returns
    -------
    str
        The predicted character. '~' if history is not in language model.
    """
    # <COGINST>
    
    
    if not str(history) in lm:
        return "~"
    
    notes, probs = unzip(lm[str(history)])
    
    i = np.random.choice(notes, p=probs)
    return i

In [30]:
notes_to_sound_freqs = {"A":220, "As":233.08, "Bb":233.08, "B":246.94,"C":261.63,"Cs":277.18,"Db":277.18,"D":293.66,"Ds":311.13,"Eb":311.13,"E":329.63, "Es":349.23,"F":349.23,"Fs":369.99,"Gb":369.99,"G":392.00,"Gs":415.30,"Ab":415.30}

def generate_music(lm, n, nnotes=100):
    """ Randomly generates `nnotes` of text by drawing from 
    the probability distributions stored in a n-gram language model 
    `lm`.

    Parameters
    ----------
    lm: Dict[str, List[Tuple[str, float]]]
        The n-gram language model. 
        I.e. the dictionary: history -> [(char, freq), ...]
    
    n: int
        Order of n-gram model.
    
    nletters: int
        Number of letters to randomly generate.

    Returns
    -------
    str
        Model-generated text. Should contain `nletters` number of
        generated characters. The pre-pended ~'s are not to be included. 
    """
    # <COGINST>
    history = []
    for i in range(n-1):
        history.append("~")

    print(type(history))
    note_names = []
    frequencies = []
    for i in range(nnotes):
        #string_hist = str(history)
        c = generate_note(lm, history)
        note_names+=c
        #string_hist+=" "
        #string_hist+=c
        #history = string_hist.split()
        history+=c
        history = history[1:]
    print(note_names)    
    for i in note_names:
        if i != "~" and i!="s" and i!="b":
            frequencies.append(notes_to_sound_freqs[i])
    return frequencies 

In [37]:
path_to_notes = r"C:\Users\g_bab\Downloads\musicdata.txt"

with open(path_to_notes, "rb") as f:
    notes = f.read().decode()

In [38]:
lm5 = train_lm(notes, 2)
#print(lm5)
frequencies = generate_music(lm5, 2, 20)

defaultdict(<class 'collections.Counter'>, {"['~']": Counter({'E': 1}), "['E']": Counter({'Ds': 45, 'D': 32, 'E': 26, 'A': 24, 'B': 21, 'G': 14, 'Gs': 6, 'C': 5, 'F': 3}), "['Ds']": Counter({'E': 46, 'D': 3}), "['B']": Counter({'A': 19, 'C': 17, 'D': 16, 'E': 15, 'G': 8, 'Es': 6, 'Db': 3, 'Bb': 3}), "['Db']": Counter({'C': 3}), "['C']": Counter({'E': 49, 'B': 29, 'A': 21, 'D': 17, 'C': 5, 'Bb': 4, 'G': 4, 'F': 2}), "['A']": Counter({'C': 48, 'B': 22, 'A': 16, 'G': 14, 'Bb': 11, 'E': 9, 'Gs': 7}), "['Es']": Counter({'G': 6}), "['G']": Counter({'A': 29, 'B': 8, 'F': 8, 'Fs': 6, 'G': 4, 'C': 3, 'D': 3, 'E': 2}), "['D']": Counter({'C': 45, 'E': 11, 'G': 7, 'Ds': 4, 'F': 3, 'Cs': 3, 'B': 1, 'A': 1, 'Bb': 1}), "['F']": Counter({'E': 17, 'F': 4, 'A': 2, 'D': 2, 'Eb': 1}), "['Bb']": Counter({'A': 14, 'C': 3, 'D': 2}), "['Gs']": Counter({'B': 6, 'G': 6, 'Gs': 1, 'A': 1}), "['Eb']": Counter({'D': 1}), "['Fs']": Counter({'F': 6}), "['Cs']": Counter({'C': 3})})
<class 'list'>
['E', 'G', 'C', 'A', 

In [39]:
def pressure(times: np.ndarray, *, amp: float, freqencies: float) -> np.ndarray:
    """Describes the temporal waveform of a pure tone impinging on a 
    microphone at times `times` (an array of times). The wave has 
    an amplitude `amp`, measured in Pascals, and a frequency 
    `freq`, measured in Hz.
    
    Parameters
    ----------
    times : numpy.ndarray, shape=(N,)
        The times at which we want to evaluate the sound wave
    
    amp : float
        The wave's amplitude (measured in Pascals - force per unit area)
    
    freq : float
        The wave's frequency (measured in Hz - oscillations per second)
    
    Returns
    -------
    numpy.ndarray, shape=(N,)
        The pressure at the microphone at times `t`

    Notes
    -----
    We only care about the wave at a fixed location, at the microphone, 
    which is why we do not have any spatial component to our wave. 
    """
    # <COGINST>
    final_list = []
    for freq in frequencies:
        final =list((amp * np.sin(2 * np.pi * freq * times)))
        final_list+=final
        
    finalarray = np.array(final_list)
    print(finalarray.shape)
    return finalarray

In [40]:
amplitude = 0.01  # Pascals
duration = 0.4 # seconds
sampling_rate = 44100 # Hz
n_samples = int(duration * sampling_rate) + 1

times = np.arange(n_samples) / sampling_rate  # seconds

print(frequencies[:20])

final_array = pressure(times, amp=amplitude, freqencies=frequencies)  # Pascals

#print(final_array[:20])

[329.63, 392.0, 261.63, 220, 246.94, 329.63, 293.66, 261.63, 293.66, 261.63, 329.63, 246.94, 220, 220, 246.94, 261.63, 293.66, 261.63, 329.63, 329.63]
(352820,)


In [41]:
from IPython.display import Audio
Audio(final_array, rate=sampling_rate)