In [1]:
import json
import re
from datetime import datetime
import random
import nltk
import pickle
from nameparser.parser import HumanName
from nltk.corpus import wordnet
# import names
import gender_guesser.detector as gender
import calendar
import names

In [2]:
with open("data/consolidatedData.json", 'r', encoding='utf-8') as f:
    consolidatedData = json.load(f)

p = random.choice(consolidatedData['allPrunedParaComplete'])

In [3]:
p

["[BAND_NAME] is an [GENRE] band, created by singer-songwriter-composer-producer-actor  Kirpatrick Thomas. Founded in [YEAR], the band originated in Newark, Delaware along with such other local bands of the period including Jake and the Stiffs, The Verge, [PERSON_NAME_FULL_3_MALE], Zen Guerilla and Smashing Orange. Heavily influenced by The Doors, [PERSON_NAME_FULL_2_FEMALE], Hawkwind, [PERSON_NAME_LAST_4] Haack, and Chrome, [BAND_NAME]'s early stages were experimental and differed greatly from their present sound though the band's musical style is ever in a period of flux.\n",
 'In the summer of [YEAR], band members Kirpatrick Thomas, [PERSON_NAME_FULL_1_MALE] and [PERSON_NAME_FULL_0_MALE] re-located to Los Angeles. The band re-formed to include Bobby Bones, [PERSON_NAME_FULL_6_MALE], Frankie "Teardrop" Emerson and Rob Campanella of The Brian Jonestown Massacre and Jason “Plucky” Anchondo of The Warlocks. Inspired by their new locale, the band began a [GENRE]',
 'In [YEAR], Kirpatrick

## Replace Band Name

In [4]:
with open('titles', 'rb') as inf:
    titles = pickle.load(inf)
    
determiners = []
nouns = []
adjectives = []
for title in titles:
    tagged_title = nltk.pos_tag(nltk.word_tokenize(title.lower()))
    for tagged_word in tagged_title:
        word = tagged_word[0]
        pos = tagged_word[1]
        if pos == 'DT':
            determiners.append(word)
        elif pos == 'NN' or pos == 'NNS':
            nouns.append(word.capitalize())
        elif pos == 'JJ':
            adjectives.append(word.capitalize())

In [5]:
def replace_band_name(text):
    def getRandName():
        determiner = random.choice(determiners).capitalize()
        [adjective1, adjective2] = random.sample(adjectives, 2)
        [noun1, noun2] = random.sample(nouns, 2)

        title_format = random.randrange(3)
        if title_format == 0:
            return determiner + ' ' + adjective1 + ' ' + noun1
        elif title_format == 1:
            return determiner + ' ' + adjective1 + ' ' + adjective2 + ' ' + noun1
        elif title_format == 2:
            return determiner + ' ' + noun1 + ' and ' + determiner + ' ' + noun2
        
    new_name = getRandName()
    text = re.sub('\[BAND_NAME\]', new_name, text)
    return text

## Replace Years

In [6]:
def replace_years(text):
    num_years = text.count('[YEAR]')
    first_year = datetime.now().year - (5 * num_years)
    years = [first_year]
    for i in range(1, num_years):
        years.append(years[i-1] + random.randint(0, 5))
        
    j = -1
    def get_year(matchobj):
        nonlocal j
        j += 1
        return years[j]

    return re.sub("\[YEAR\]", lambda x: str(get_year(x)), text)

## Replace Months

In [7]:
def replace_months(text):
    months = [calendar.month_name[i] for i in range(1,13)] + [calendar.month_abbr[i] for i in range(1,13)]
    return re.sub('\[MONTH\]', lambda x: random.choice(months), text)

In [8]:
replace_months("s replacement later that year.[MONTH]")

's replacement later that year.Dec'

## Replace Names 

In [64]:
def replace_person_names(text):
    full_name_genders = {}
    for k in re.findall('PERSON_NAME_FULL_._(?:MALE|FEMALE)', para):
        if k[-6:] == 'FEMALE':
            full_name_genders[k] = 'female'
        else:
            full_name_genders[k] = 'male'

    full_name_replacements = {}
    for k,v in full_name_genders.items():
        full_name_replacements[k] = names.get_full_name(gender=v)
        
    # replace full names
    for k,v in full_name_replacements.items():
        text = re.sub(k, v, text) 

    # replace last names
    for k in re.findall('PERSON_NAME_LAST_.', para):
        # get corresponding last name from full names by person number
        last_name = None
        for name_key in full_name_replacements.keys():
            person_num = k[-1]
            if person_num in name_key:
                last_name = full_name_replacements[name_key].split()[1]
                
        if last_name:
            text = re.sub(k, last_name, text)
        else:
            text = re.sub(k, random.choice(['Ngo', 'Movva', 'Rafian', 'Jain']), text) # easter egg
            
    # replace first names
    for k in re.findall('PERSON_NAME_FIRST_.', para):
        # get corresponding first name from full names by person number
        first_name = None
        for name_key in full_name_replacements.keys():
            person_num = k[-1]
            if person_num in name_key:
                first_name = full_name_replacements[name_key].split()[0]
                
        if first_name:
            text = re.sub(k, first_name, text)
        else:
            text = re.sub(k, random.choice(['James', 'Mani', 'Arman', 'Ishaan']), text) # easter egg
        
    return text

In [None]:
for k,v in name_replacements.items():
    text = re.sub(k.split()[0], v.split()[0], text) # replace first name
#    text = re.sub(k.split()[1], v.split()[1], text) # replace last name

## Replace Genre

### Get all Genres

In [10]:
with open('data/consolidatedData.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
    
genres = set()
for genre in data['allGenres']:
    if isinstance(genre, str):
        g = re.split(',|\[', genre)[0]
        genres.add(g)
        
genres = list(genres) # need to convert to list to be able to take a random choice from it

In [11]:
def replace_genre(text):
    return re.sub('\[GENRE\]', random.choice(genres), text)

## Replace All Placeholders

In [66]:
with open('data/consolidatedData.json', 'r', encoding='utf-8') as inf:
    paras = json.load(inf)

#with open('data/0.json', 'r', encoding='utf-8') as inf:
#    data = json.load(inf)
    
para = paras['allPrunedParaComplete'][10]
para = ''.join(para)
print(para)
print("")
para += '[PERSON_NAME_FIRST_0]'
para = replace_band_name(para)
para = replace_years(para)
para = replace_months(para)
para = replace_genre(para)
para = re.sub('\[|\]', '', para) # need to get rid of brackets to be able to replace names using re.sub
print(replace_person_names(para))

[BAND_NAME] was a [GENRE] band from Austin, Texas.
The seeds of [BAND_NAME] were originally planted deep underground by founding members [PERSON_NAME_FIRST_0] Faust (Lead Vocals, Bass, Guitars) and Buz Zoller (Guitars, Vocals) in [YEAR]. With the addition of [PERSON_NAME_FULL_0_MALE] (Guitars, Vocals) in [YEAR] and [PERSON_NAME_FULL_1_MALE] (Drums, Vocals) in [YEAR], the roots took hold. The unassuming rock band, working in the sweaty club environments of Austin, Texas and nearby cities and states, began its campaign to enter the music world with their independently produced 4-track, cassette only release "Turmoil in the Toybox". 
After being "discovered" (buying and listening to Turmoil) by CMJ (College Media Journal) veteran Scott Byron (A&R guru for BMG label Zoo Entertainment), the band was quickly signed following feverish live performances preceding and during Austin's popular South by Southwest Music Festival. A fast paced [YEAR] resulted in the rapid replacement of local manage