<a href="https://colab.research.google.com/github/asigalov61/Torah-Markovify/blob/main/Torah_Markovify.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Markovify Torah (RUS) (ver 1.0)

***

## Based upon absolutely amazing markovify package of @jsvine: https://github.com/jsvine/markovify

***

### Project Los Angeles
### Tegridy Code 2021

***

# Setup environment

In [None]:
#@title Install dependencies
!git clone https://github.com/asigalov61/tegridy-tools
!pip install unidecode
!pip install tqdm

In [None]:
#@title Load needed modules
print('Loading needed modules. Please wait...')

import sys
import os
import json
import secrets

os.chdir('/content/tegridy-tools/tegridy-tools/')
import TMIDI
import markovify
os.chdir('/content/')

from pprint import pprint

import tqdm.auto

from google.colab import output, drive

print('Creating Dataset dir...')
if not os.path.exists('/content/Dataset'):
    os.makedirs('/content/Dataset')

os.chdir('/content/')
print('Loading complete. Enjoy! :)')

# Load processed TXT MIDI dataset into memory

In [None]:
#@title Download Torah Text (Russian)
!wget 'https://github.com/asigalov61/Torah-Markovify/raw/main/Torah-Rus.txt'

In [None]:
#@title Load/Reload processed TXT dataset
full_path_to_TXT_dataset = "/content/Torah-Rus.txt" #@param {type:"string"}

print('Loading TXT MIDI dataset. Please wait...')
with open(full_path_to_TXT_dataset) as f:
    text = f.read()
print('Dataset loaded! Enjoy :)')    

# Train TXT Markov chain/model

In [None]:
#@title Train Markov-chain/model
markov_chain_state_size = 9 #@param {type:"slider", min:1, max:10, step:1}

print('Training Markov chain/model. Please wait...')
markov_text_model = markovify.NewlineText(text, well_formed=False, state_size=markov_chain_state_size)

print('Compiling model...')
markov_text_model.compile(inplace=True)

print('Model is ready! Enjoy :)')

In [None]:
#@title Save the model
full_path_to_json_save_file = "/content/Torah-Russian-Model.json" #@param {type:"string"}

print('Converting model to json...')
model_json = markov_text_model.to_json()

print('Saving model as json file...')
with open(full_path_to_json_save_file, 'w') as f:
    json.dump(model_json, f)

print('Task complete! Enjoy! :)')

In [None]:
#@title Load/Re-load saved model
full_path_to_json_save_file = "/content/Torah-Russian-Model.json" #@param {type:"string"}

print('Loading model from json file...')
f = open(full_path_to_json_save_file)
model_json = json.load(f)

print('Restoring the model...')
markov_text_model = markovify.Text.from_json(model_json)

print('Model loaded and restored! Enjoy! :)')

# Generate Text

In [None]:
#@title Generate Text

#@markdown NOTE: If nothing is being generated after 10+ attempts, try again with different model state # and generation settings

minimum_number_of_characters_to_generate = 100 #@param {type:"slider", min:100, max:1500, step:100}
number_of_cycles_to_try_to_generate_desired_result = 10000 #@param {type:"slider", min:10, max:10000, step:10}
minimum_notes_to_generate = 10 #@param {type:"slider", min:10, max:1000, step:10}
overlap_ratio = 0.8 #@param {type:"slider", min:0.1, max:1, step:0.05}
print_generated_song = True #@param {type:"boolean"}

Output_TXT_String = ''

attempt = 0

print('Generating text. Please wait...')

while (len(Output_TXT_String.split(' ')[1:])-2) < minimum_notes_to_generate:
  out = markov_text_model.make_sentence(min_chars=minimum_number_of_characters_to_generate, 
                              tries=number_of_cycles_to_try_to_generate_desired_result,
                              max_overlap_ratio=overlap_ratio)

  Output_TXT_String = ''.join(out)
  print('Attempt #', attempt)
  attempt += 1
  if attempt > 5:
    break

print('Generation complete!')
print('=' * 70)
print(Output_TXT_String.split(' ')[0], 'with', len(Output_TXT_String.split(' ')[1:])-2, 'notes.')
print('=' * 70)

if print_generated_song:
  pprint(Output_TXT_String)
  print('=' * 70)