In [1]:
from bs4 import BeautifulSoup
import requests
import random
from tqdm import tqdm

MAIN_KEYS = ["A", "Ab", "B", "Bb", "C", "C#", "D", "E", "Eb", "F", "F#", "G", "Gb"]

def get_random_key():
    return MAIN_KEYS[random.randint(0, len(MAIN_KEYS)-1)]

def get_random_tab(key="C", measures=8):
    url = "https://www.asciitabs.com/random"
    params = {
        "instrument": "guitar",
        "measures": measures,
        "allow-open-strings": "false",
        "single-notes-only": "false",
        "key":key,
        "maxfret":'16'
    }
    response = requests.request("GET", url, params=params)
    return response.text


def transform_ascii_tab(ascii_tab: str) -> str:
    # Split the input into lines
    lines = ascii_tab.split('\n')
    # Dictionary to hold each string's notes
    strings = {'e': [], 'B': [], 'G': [], 'D': [], 'A': [], 'E': []}
    
    # Populate each list with the corresponding notes from the ASCII tab
    for line in lines:
        if line.strip():
            string_key = line[0]
            if string_key in strings:
                strings[string_key].append(line[2:])
    # Combine the strings into a single one-line tab
    one_line_tab = '\n'.join(f"{key}|{''.join(value)}" for key, value in strings.items())
    
    return one_line_tab


def extract_tab_from_html(html_str:str):
    # Parse the HTML content
    soup = BeautifulSoup(html_str, 'html.parser')
    # Find the tab container
    tab_container = soup.find('div', class_='tab-container')
    # Extract text from the tab container and replace span tags with their content
    for span in tab_container.find_all('span'):
        span.replace_with(span.text.strip())
    # Get the cleaned text
    guitar_tab = '\n'.join([line.strip() for line in tab_container.text.split('\n')])
    
    guitar_tab = transform_ascii_tab(guitar_tab)
    
    return guitar_tab

def generate_random_tab(key=None, measures=8):
    if key==None:
        key=get_random_key()
    html_tab = get_random_tab(measures=measures, key=key)
    tab = extract_tab_from_html(html_tab)
    return tab


In [35]:
print(generate_random_tab("C", measures=4))


e|-----------------|---------------|---------------|--------5-------|
B|------5----------|-----1--3------|-----8---------|--------12--13--|
G|---------10------|---------------|--9-----16-----|--2--9----------|
D|--12---------14--|-----------14--|---------------|--------14------|
A|--5------7-------|---------------|---------------|-----2----------|
E|---------3-------|--3-----3------|---------------|--------13------|


In [55]:
def str_table_to_tab(table:str):
    # Split the input into lines
    lines = table.strip().split('\n')
    
    
    lines = [l.split("|")[1:-1] for l in lines]
    # Initialize an array to hold the beats
    beats = []
    for i in range(len(lines[0])):
        beat=[]
        for j in range(6):
            beat.append(lines[j][i])
        beats.append(beat)
    
    return beats
    
def tab_to_str(t, add_string_keys=True):
    tab = ""
    keys = ['e', 'B', 'G' , 'D' ,'A' , 'E']
    for i in range(6):
        if add_string_keys:
            tab+=keys[i] + "|"
        for j in range(len(t)):
            tab += t[j][i] + "|"
        tab += "\n"
    return tab


SYSTEM_PROMPT = """
You are a skilled guitar instructor and music theory expert. Generate the next four beats of a guitar tab based on the user's input.

1. Review the first four beats and key provided by the user.
2. Generate the next four beats that fit the specified key.
"""

USER_PROMPT = """
Key: {key}

Tab: {tab}
"""



def generate_messages(key:str, table:str):
    messages = []
    
    tab = str_table_to_tab(table)
    
    user_tab = tab_to_str(tab[:int(len(tab)/2)], add_string_keys=True)
    assistant_tab = tab_to_str(tab[int(len(tab)/2):], add_string_keys=True)
    
    messages.append({
        "role": "system",
        "content": SYSTEM_PROMPT
    })
    
    user_prompt = USER_PROMPT.format(key=key, tab=user_tab)
    messages.append({
        "role": "user",
        "content": user_prompt
    })
    
    messages.append({
        "role": "assistant",
        "content": assistant_tab
    })
    
    return messages
    
    
def generare_dataset(ds_len:int):
    messages_list = []
    for _ in tqdm(range(ds_len)):
        key = get_random_key()
        tab = generate_random_tab(key=key)
        messages_list.append(generate_messages(key, tab))
    return messages_list

In [4]:
generare_dataset(1)

100%|██████████| 1/1 [00:00<00:00,  2.12it/s]


[[{'role': 'system',
   'content': "\nYou are a skilled guitar instructor and music theory expert. Generate the next four beats of a guitar tab based on the user's input.\n\n1. Review the first four beats and key provided by the user.\n2. Generate the next four beats that fit the specified key.\n"},
  {'role': 'user',
   'content': '\nKey: C#\n\nTab: e|-------------2--|----------------|------------4---|--9----------12--|\nB|------9---------|------5-----14--|--4-----2---12--|------12-----10--|\nG|----------------|--13------------|----------------|--9--------------|\nD|----------------|------------6---|-----2--9-------|--16--9----------|\nA|---------14-----|--9---4---------|--0---------14--|--12------9------|\nE|--16------------|--11------------|--------11------|--11--4------16--|\n\n'},
  {'role': 'assistant',
   'content': 'e|----------------|--7-------------|------7---14-----|-----12--7---7---|\nB|--12---------9--|--14--5-----16--|--2--------------|---------10------|\nG|----------1--2

In [59]:
key = get_random_key()
table = generate_random_tab(key=key, measures=96)
tab_table = str_table_to_tab(table)
for i in range(0, len(tab_table), 8):
    tab = tab_to_str(tab_table[i:i+8])
    print(generate_messages(key, tab))

[{'role': 'system', 'content': "\nYou are a skilled guitar instructor and music theory expert. Generate the next four beats of a guitar tab based on the user's input.\n\n1. Review the first four beats and key provided by the user.\n2. Generate the next four beats that fit the specified key.\n"}, {'role': 'user', 'content': '\nKey: E\n\nTab: e|---------------|--14------------|--------------|--0---------7---|\nB|---------------|--9---2--9--12--|-----9--------|----------------|\nG|--2---------2--|------------16--|--------------|--9-------------|\nD|-----9------4--|--4------1------|--------------|--9-----13--14--|\nA|---------------|------9-----14--|-----0--------|--9---------12--|\nE|--------11-----|----------------|--------7--5--|-----4----------|\n\n'}, {'role': 'assistant', 'content': 'e|--7---12---------|-----7--16------|------------------|------2--------|\nB|----------16-----|------------10--|------16----------|--16--9--------|\nG|-----------------|-----9------13--|--16------9---11--

In [53]:
len(str_table_to_tab(tab))

96

In [60]:
import json
def generare_dataset(ds_len:int):
    with open("dataset.jsonl", "w") as f:
        for _ in tqdm(range(ds_len)):
            try:
                key = get_random_key()
                table = generate_random_tab(key=key, measures=96)
                tab_table = str_table_to_tab(table)
                for i in range(0, len(tab_table), 8):
                    tab = tab_to_str(tab_table[i:i+8])
                    messages = generate_messages(key, tab)
                    json.dump(messages, f)
                    f.write("\n")
            except:
                continue


generare_dataset(8000)

100%|██████████| 8000/8000 [1:27:16<00:00,  1.53it/s]


In [68]:
# load the dataset and split it into train and test

import random
import json
def split_train_test(dataset_path:str):
    with open(dataset_path) as f:
        dataset = [json.loads(line) for line in f]
    random.shuffle(dataset)
    split = int(len(dataset)*0.9)
    train = dataset[:split]
    test = dataset[split:]
    
    with open("train.jsonl", "w") as f:
        for line in train:
            json.dump(line, f)
            f.write("\n")

    with open("test.jsonl", "w") as f:
        for line in test:
            json.dump(line, f)
            f.write("\n")
    print(f"Train size: {len(train)}, Test size: {len(test)}")

split_train_test("dataset.jsonl")



Train size: 86400, Test size: 9600


In [69]:
import os
import json
api_keys = json.load(open("api_keys.json"))
print(api_keys["MISTRAL_API_KEY"])
os.environ["WANDB_API_KEY"] = api_keys["WANDB_API_KEY"]
os.environ["MISTRAL_API_KEY"] = api_keys["MISTRAL_API_KEY"]

6RxPP2v5jDmGeRTQfH81KYM0plRVPhyP


In [70]:
from mistralai.client import MistralClient

api_key = os.environ.get("MISTRAL_API_KEY")
print(api_key)
client = MistralClient(api_key=api_key)


6RxPP2v5jDmGeRTQfH81KYM0plRVPhyP


In [71]:

with open("train.jsonl", "rb") as f:
    guitar_generator_train = client.files.create(file=("guitar_generator_train.jsonl", f))
with open("test.jsonl", "rb") as f:
    guitar_generator_eval = client.files.create(file=("guitar_generator_eval.jsonl", f))

In [72]:
from mistralai.models.jobs import WandbIntegrationIn, TrainingParameters

wandb_api_key = os.environ.get("WANDB_API_KEY")

created_jobs = client.jobs.create(
    model="open-mistral-7b",
    training_files=[guitar_generator_train.id],
    validation_files=[guitar_generator_eval.id],
    hyperparameters=TrainingParameters(
        training_steps=300,
        learning_rate=0.0001,
    ),
    integrations=[
        WandbIntegrationIn(
            project="Fine-tuning",
            run_name="Second run",
            api_key=wandb_api_key,
        ).dict()
    ]
)

In [73]:
jobs = client.jobs.list()
print(jobs.data[0].id)

b5caa7c6-d99f-4fa2-a6ce-f000998e306a


In [74]:
retrieved_jobs = client.jobs.retrieve(jobs.data[0].id)
print(retrieved_jobs)


id='b5caa7c6-d99f-4fa2-a6ce-f000998e306a' hyperparameters=TrainingParameters(training_steps=300, learning_rate=0.0001) fine_tuned_model='ft:open-mistral-7b:7e80780e:20240629:b5caa7c6' model='open-mistral-7b' status='SUCCESS' job_type='FT' created_at=1719667978 modified_at=1719669853 training_files=['0985dd75-72ce-4f87-bb2c-bfc281c35050'] validation_files=['477b195b-23d4-4b0b-b690-136f69cc4e1d'] object='job' integrations=[WandbIntegration(type='wandb', project='Fine-tuning', name=None, run_name='Second run')] events=[Event(name='status-updated', data={'status': 'SUCCESS'}, created_at=1719669853), Event(name='status-updated', data={'status': 'RUNNING'}, created_at=1719667979), Event(name='status-updated', data={'status': 'QUEUED'}, created_at=1719667978)] checkpoints=[Checkpoint(metrics=Metric(train_loss=0.90281, valid_loss=0.904977, valid_mean_token_accuracy=1.872515), step_number=300, created_at=1719669810), Checkpoint(metrics=Metric(train_loss=0.90722, valid_loss=0.908142, valid_mean_

In [75]:
tab = generate_random_tab("A", measures=4)

In [98]:
retrieved_jobs.fine_tuned_model

'ft:open-mistral-7b:7e80780e:20240629:b5caa7c6'

In [108]:
from mistralai.models.chat_completion import ChatMessage
import os
import json
api_keys = json.load(open("api_keys.json"))
os.environ["MISTRAL_API_KEY"] = api_keys["MISTRAL_API_KEY"]

from mistralai.client import MistralClient

api_key = os.environ.get("MISTRAL_API_KEY")
print(api_key)
client = MistralClient(api_key=api_key)


SYSTEM_PROMPT = """
You are a skilled guitar instructor and music theory expert. Generate the next four beats of a guitar tab based on the user's input.

1. Review the first four beats and key provided by the user.
2. Generate the next four beats that fit the specified key.
"""

USER_PROMPT = """
Key: {key}

Tab: {tab}
"""

def correct_table(table:str):
    table = str_table_to_tab(table)
    for i in range(len(table)):
        max_len = max([len(x) for x in table[i]])
        for j in range(len(table[i])):
            table[i][j] = table[i][j].rjust(max_len, "-")
    table = tab_to_str(table)
    return table

def guitarstral_inference(tab:str, key:str):
    user_messages = USER_PROMPT.format(key=key, tab=tab)
    chat_response = client.chat(
        model='ft:open-mistral-7b:7e80780e:20240629:b5caa7c6',
        messages=[ChatMessage(role='system', content=SYSTEM_PROMPT), ChatMessage(role='user', content=user_messages)]
    )
    return correct_table(chat_response.choices[0].message.content)

def generare_table(table:str, key:str, nb_beats:int):
    tab_table = str_table_to_tab(table)
    for _ in range(0, nb_beats, 4):
        new_table = guitarstral_inference(table, key)
        new_tab_table = str_table_to_tab(new_table)
        tab_table += new_tab_table
        table = new_table
    return tab_table

tab = generate_random_tab("A", measures=4)

t = generare_table(tab, "A", 12)

In [109]:
print(tab_to_str(t))

e|----------7-----|---------------|--10------------|---------7------|--12--10--10--16--|-----------------|-----7----------|-----4--7--12--|----------------|----------------|---------------|--0-------------|---------------|-------------9---|----------------|--10-------------|
B|--10------------|---------------|------0--2------|----------------|----------10------|--10---------15--|----------------|---------------|-----12-----12--|-----12---------|---------------|----------------|---------------|------0----------|---------9--9---|------10--7------|
G|--9---6---------|--2------------|----------------|--14-----6------|------2-------7---|--9-------1------|-----11--11-----|---------------|----------------|---------2--1---|---------------|--2--4----------|-----6---------|-----------------|----------------|--7--------------|
D|--14---------9--|--7------------|---------14-----|--16------------|--11--14------12--|------12---------|-----11--11-----|---------------|-----11---------|--7------2--14--

In [103]:
for i in range(0, 5, 4):
    print(i)

0
4


In [79]:
print(chat_response.choices[0].message.content)

e|-----16---------|------12--16-----|--------2-----|--10------------|
B|-----9----------|----------14--3--|-----7--------|---------------|
G|--2------16-----|-----------------|--------------|---------------|
D|---------9------|--16-------------|--------------|---------------|
A|--7--16---------|------0----------|--------0-----|--12--4-----0--|
E|-----14--16--7--|-----------------|--------------|---------------|



In [39]:
print(tab)

e|------9----------|-----2----------|--7------16--4--|---------16------|
B|---------2-------|-----9----------|-----2-------9--|-----10----------|
G|--13-------------|-----14---------|---------4------|-----9-------14--|
D|-----------------|----------------|--9----------2--|--7--------------|
A|------4------14--|------------14--|--9--4-------5--|-------------12--|
E|---------14------|----------------|-----14---------|--2--------------|


In [95]:



print(correct_table(test))

e|----------------|-----------------|----14------1-------|-------------14--|
B|-------9-----0--|-----------------|----6---14------4---|--9--0-----------|
G|----------4-----|---4---8---------|--15--------10--11--|-------------8---|
D|----------3-----|-------16--1-----|------------1---6---|-----------------|
A|----------------|---11------------|----------------6---|---------14------|
E|----1--------9--|-----------------|--------16--7-------|-----13------4---|



In [91]:
test = """
e|-----10--9------|----------------|------------16--|----5--14---------|
B|------------10--|-------------3--|-------------7--|------7------15--|
G|------------14--|------11--------|-------------1--|------------------|
D|------------11--|--12------7--9--|--7---11-----2--|---2--------------|
A|-----4------12--|----------2--0--|--2---16--------|----------12--9---|
E|-----16---------|------5---------|--12--10--------|------12------12--|"""

In [81]:
from tabs_MIDI.read_tabs_app import Tabs
from tabs_MIDI.midi_generator import Track
import os
import time

def convert_to_midi(tablature:str, tempo):
    t = Tabs(tablature.split("\n"))
    t.preprocess()
    t.displayTabs()
    t.convertNotes()

    f_name = f"tab_{time.time()}.mid"
    f_local_path = f"./{f_name}"
    outputTrack = Track(int(tempo))
    outputTrack.midiGenerator(t.a, path=f_local_path)
    
    command = f"timidity {f_local_path}"
    os.system(command)
    return 

In [82]:
convert_to_midi(test, 120)




-------10---9------------------------------------16--------5--14-----------
--------------10-----------------3----------------7---------7-------15----
--------------14----------11-----------------------1----------------------
--------------11------12-------7--9-----7---11------2------2---------------
-------4------12--------------2--0-----2---16--------------------12---9----
-------16-----------------5------------12---10----------------12-------12----
Playing ./tab_1719767107.8283079.mid
MIDI file: ./tab_1719767107.8283079.mid
Format: 1  Tracks: 2  Divisions: 960
_/tab_1719767107_8283079.ogg: No such file or directory
Playing time: ~8 seconds
Notes cut: 0
Notes lost totally: 0


In [34]:
convert_to_midi(chat_response.choices[0].message.content, 120)




-------10---9--------------------------------------16------5--14-----------
--------------10-----------------3----------------7--------7-------15----

IndexError: list index out of range

In [94]:
test="""e|--------------|----------------|--14------1-------|-------------14--|
B|-----9-----0--|----------------|--6---14------4---|--9--0-----------|
G|--------4-----|--4---8---------|--15--------10--11--|-------------8---|
D|--------3-----|------16--1-----|-----------1---6---|-----------------|
A|----------------|---11------------|---------------6---|---------14------|
E|--1--------9--|----------------|------16--7-------|-----13------4---|
"""

In [None]:
def str_tabs_to_tab(tabs:str):
    # Split the input into lines
    lines = tabs.strip().split('\n')
    
    
    lines = [l.split("|")[1:-1] for l in lines]
    # Initialize an array to hold the beats
    beats = []
    for i in range(len(lines[0])):
        beat=[]
        for j in range(6):
            beat.append(lines[j][i])
        beats.append(beat)
    
    return beats
    
t = str_tabs_to_tab(test)

In [None]:
def tab_to_str(t):
    tab = ""
    keys = ['e', 'B', 'G' , 'D' ,'A' , 'E']
    for i in range(6):
        tab+=keys[i] + "|"
        for j in range(len(t)):
            tab += t[j][i] + "|"
        tab += "\n"
    return tab

print(tab_to_str(t[2:]))

In [None]:
convert_to_midi(test, 90)