In [1]:
# Import character dictionaries, useful to map a character to its data, and a fixed random seed
from Data.data_dicts import character_dict, source_dict, random_state

# Characters list
characters = ['Barney', 'Sheldon' , 'Harry' , 'Fry' , 'Vader' , 'Joey' , 'Phoebe' , 'Bender']

In [2]:
# Mount google drive, if in Colaboratory environment
import os
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    base_folder = '/content/drive/My Drive/unibo/NLP_project/BarneyBot'
    os.system("pip install datasets")
    os.system("pip install transformers")
    os.system("pip install rouge_score")
    os.system("pip install -U sentence-transformers")
else:
    base_folder = os.getcwd()
    
# Set cache folder for huggingface locally
os.environ["HF_DATASETS_CACHE"] = os.path.join(base_folder, "cache")

In [3]:
# Imports to handle loading the various tv/series datasets and the creation of the common dataset
import pandas as pd
from tqdm import tqdm
from datasets import load_dataset, DatasetDict

In [4]:
# Function to load the dataset for a given character
def load_df(character):
    # Define input and output folders for chosen character
    in_folder = os.path.join(base_folder, 'Data', 'Characters', character)
    if not os.path.exists(in_folder):
        os.makedirs(in_folder)
    out_folder = os.path.join(base_folder, 'Data', 'Characters', character)
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    # Get dataset path
    dataset_path = os.path.join(base_folder, "Data", "Characters", character, character+'.csv')
    # Load HuggingFace dataset
    character_hg = load_dataset('csv', 
                                data_files=dataset_path, 
                                cache_dir=os.path.join(base_folder, "cache"))
    
    # Perform 85% train / 10% test / 5% validation with a fixed seed (same one used for the bot, of course)
    train_test_hg = character_hg['train'].train_test_split(test_size=0.15, seed=random_state)
    test_val = train_test_hg['test'].train_test_split(test_size=0.33, seed=random_state)
    
    # Store splits into a HuggingFace dataset
    character_hg = DatasetDict({
        'train': train_test_hg['train'],
        'test': test_val['train'],
        'val': test_val['test']
    })
    
    # Return dataset
    return character_hg

In [5]:
# Initialize list of selected lines
lines_selected_list = []

# Characters

## Barney

In [6]:
# Load Barney dataset
temp = load_df(characters[0])

Using custom data configuration default-a6ad4235c1f657bf
Reusing dataset csv (C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-a6ad4235c1f657bf\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-a6ad4235c1f657bf\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-42d4a60678e52392.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-a6ad4235c1f657bf\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-9f2121ab0c631844.arrow
Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-a6ad4235c1f657bf\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-0c8b5f20a1ab2b69.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-a6ad4235c1f657bf\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-9a6642094a4b5bc0.arrow


In [7]:
# Print Barney lines for sanity check
[s for s in temp['test']]

[{'response': "Daddy's home.",
  'context/0': "I know, it's two years of my life I'm never getting back. A little part of me just wants to jump the bones of the next guy I see.",
  'context/1': "I'm just surprised you didn't dump him sooner.",
  'context/2': 'And then Trudy filled us in.',
  'context/3': 'Trudy. My name is Trudy.',
  'context/4': 'Wait, this is klling me. We have to find out who that girl is.'},
 {'response': "I don't have much time!",
  'context/0': "Wh-Where'd you get a meatball...",
  'context/1': 'Eat this meatball sub.',
  'context/2': 'Yes, of course, of course... anything.',
  'context/3': 'Thank you, Marshall.  Marshall... can I ask one final favor, my friend?',
  'context/4': "We're not going anywhere, buddy. We're gonna stay here right till the end."},
 {'response': "I could tell you knew something was up with me, and you're right. But I can't tell you what it is. I should tell you, but I can't! I have to. I never will! I'm going to. Let's just drop it. What'

In [8]:
# Select a few specific lines from the dataset, and append them to the selected ones
lines_selected_list.append(['Barney, this is about the building.', 'Come on. This is so about the girl.', 'HIMYM'])
lines_selected_list.append(["All right. I'll be right there. Stay where you are.", 'Ted, Ted.', 'HIMYM'])
lines_selected_list.append(["I think there's a pretty girl smiling at me there.", 'Hey, this is a chair, but go ahead and drag it.', 'HIMYM'])
lines_selected_list.append(['I love you, man.', 'Me too, buddy.', 'HIMYM'])
lines_selected_list.append(["Not even if she's hot?", "Not even if her mom's hot.", "HIMYM"])
lines_selected_list

[['Barney, this is about the building.',
  'Come on. This is so about the girl.',
  'HIMYM'],
 ["All right. I'll be right there. Stay where you are.", 'Ted, Ted.', 'HIMYM'],
 ["I think there's a pretty girl smiling at me there.",
  'Hey, this is a chair, but go ahead and drag it.',
  'HIMYM'],
 ['I love you, man.', 'Me too, buddy.', 'HIMYM'],
 ["Not even if she's hot?", "Not even if her mom's hot.", 'HIMYM']]

## Sheldon

In [9]:
temp = load_df(characters[1])

Using custom data configuration default-c73314df1b13186a
Reusing dataset csv (C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-c73314df1b13186a\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-c73314df1b13186a\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-1cb608e470b09cb3.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-c73314df1b13186a\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-67190f9a5dd10a36.arrow
Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-c73314df1b13186a\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-50ba7b187fd844a8.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-c73314df1b13186a\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-8ba2b6ec05e5e394.arrow


In [10]:
[s for s in temp['test']]

[{'response': 'She called me dumbass.',
  'context/0': 'Oh, sweetie, Im sorry.',
  'context/1': 'Leslie Winkle, Penny. She belittles my research.',
  'context/2': 'Sheldon, come back, youre losing me.',
  'context/3': 'Do you know, its amazing how many supervillains have advanced degrees. Graduate school should probably do a better job of screening those people out.',
  'context/4': 'Okay, I get it, I get it, I get it.'},
 {'response': 'Well never know.',
  'context/0': 'Oh. Wow. Good job. Okay, um, can you do this?',
  'context/1': 'I am doing it.',
  'context/2': 'All right, lets start with a toe touch. Okay, you do it.',
  'context/3': 'All right.',
  'context/4': 'Its good to stretch your muscles before you run.'},
 {'response': 'Oh, you shouldnt have.',
  'context/0': 'Thanks. And, Sheldon, I know tonights the night you eat Thai food, so I went to the Asian market, got all the ingredients and made it from scratch.',
  'context/1': 'Ooh, it smells good.',
  'context/2': 'Hi.',
  'c

In [11]:
lines_selected_list.append(['Soft kitty, warm kitty Little ball of fur', 'Happy kitty, slippy kitty Pur pur pur', 'TBBT'])
lines_selected_list.append(['Penny.', 'Thats just wrong.', 'TBBT'])
lines_selected_list.append(['Oh. Sheldon, thank you. Thats so romantic. But what about Rajesh? He was okay with you choosing the name?', 'Well, it took a little negotiating, but I wore him down. Uh, we get the asteroid, and if you and I have children, they all have to be named Rajesh.', 'TBBT'])
lines_selected_list.append(['I didnt break it. I, I guess Stuart sold it to me like this.', 'Yes. Yes, he did, that is a perfectly satisfying and plausible explanation. Yeah, lets all be mad at Stuart.', 'TBBT'])
lines_selected_list.append(['Be careful.', 'If I were not being careful, your telling me to be careful would not make me careful.', 'TBBT'])
lines_selected_list

[['Barney, this is about the building.',
  'Come on. This is so about the girl.',
  'HIMYM'],
 ["All right. I'll be right there. Stay where you are.", 'Ted, Ted.', 'HIMYM'],
 ["I think there's a pretty girl smiling at me there.",
  'Hey, this is a chair, but go ahead and drag it.',
  'HIMYM'],
 ['I love you, man.', 'Me too, buddy.', 'HIMYM'],
 ["Not even if she's hot?", "Not even if her mom's hot.", 'HIMYM'],
 ['Soft kitty, warm kitty Little ball of fur',
  'Happy kitty, slippy kitty Pur pur pur',
  'TBBT'],
 ['Penny.', 'Thats just wrong.', 'TBBT'],
 ['Oh. Sheldon, thank you. Thats so romantic. But what about Rajesh? He was okay with you choosing the name?',
  'Well, it took a little negotiating, but I wore him down. Uh, we get the asteroid, and if you and I have children, they all have to be named Rajesh.',
  'TBBT'],
 ['I didnt break it. I, I guess Stuart sold it to me like this.',
  'Yes. Yes, he did, that is a perfectly satisfying and plausible explanation. Yeah, lets all be mad at

## Harry

In [12]:
temp = load_df(characters[2])

Using custom data configuration default-82f37a1aaf6145b7
Reusing dataset csv (C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-82f37a1aaf6145b7\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-82f37a1aaf6145b7\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-363e838cf8c57927.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-82f37a1aaf6145b7\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-395c96d1ffa05f88.arrow
Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-82f37a1aaf6145b7\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-5404ccb9ff06d17b.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-82f37a1aaf6145b7\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-a3852ad9abbfd5bf.arrow


In [13]:
[s for s in temp['test']]

[{'response': 'Stop, Dobby!',
  'context/0': 'Bad Dobby!',
  'context/1': 'That was an awful thing to say.',
  'context/2': 'No, I havent.',
  'context/3': 'You cant have met many decent wizards then.',
  'context/4': 'Dobby has heard of your greatness, sir, but never has he been asked to sit down by a wizard, like an equal.'},
 {'response': "My dad wasn't a drunk.",
  'context/0': 'What did you say?',
  'context/1': "That's a lie.",
  'context/2': 'And a drunk too, no doubt?',
  'context/3': 'He was unemployed.',
  'context/4': "Nothing. He didn't work."},
 {'response': "Alive, you're free.",
  'context/0': 'Besides, dead, the truth dies with him.',
  'context/1': "I didn't think my dad would have wanted his best friends to become killers.",
  'context/2': "He doesn't deserve it.",
  'context/3': 'That was a noble thing you did back there.',
  'context/4': "It'll be nice to do it again as a free man."},
 {'response': "Don't even think about it.",
  'context/0': "In fact, I'm going to 

In [14]:
lines_selected_list.append(['But why would anyone go near that dog?', 'The day I was at Gringotts, Hagrid took something out of one of the vaults.', 'HP'])
lines_selected_list.append(['Expecto Patronum!', 'Expecto Patronum!', 'HP'])
lines_selected_list.append(['Ron Weasley.', "I'm Harry. Harry Potter.", 'HP'])
lines_selected_list.append(['I spoke a different language?', "But I didn't realize...", 'HP'])
lines_selected_list.append(['Harry?', 'Professor.', 'HP'])
lines_selected_list

[['Barney, this is about the building.',
  'Come on. This is so about the girl.',
  'HIMYM'],
 ["All right. I'll be right there. Stay where you are.", 'Ted, Ted.', 'HIMYM'],
 ["I think there's a pretty girl smiling at me there.",
  'Hey, this is a chair, but go ahead and drag it.',
  'HIMYM'],
 ['I love you, man.', 'Me too, buddy.', 'HIMYM'],
 ["Not even if she's hot?", "Not even if her mom's hot.", 'HIMYM'],
 ['Soft kitty, warm kitty Little ball of fur',
  'Happy kitty, slippy kitty Pur pur pur',
  'TBBT'],
 ['Penny.', 'Thats just wrong.', 'TBBT'],
 ['Oh. Sheldon, thank you. Thats so romantic. But what about Rajesh? He was okay with you choosing the name?',
  'Well, it took a little negotiating, but I wore him down. Uh, we get the asteroid, and if you and I have children, they all have to be named Rajesh.',
  'TBBT'],
 ['I didnt break it. I, I guess Stuart sold it to me like this.',
  'Yes. Yes, he did, that is a perfectly satisfying and plausible explanation. Yeah, lets all be mad at

## Fry

In [15]:
temp = load_df(characters[3])

Using custom data configuration default-095be2aab8976f7d
Reusing dataset csv (C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-095be2aab8976f7d\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-095be2aab8976f7d\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-05c20a0414aaab49.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-095be2aab8976f7d\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-3c93b3526e468e71.arrow
Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-095be2aab8976f7d\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-dba8888b5ae9634f.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-095be2aab8976f7d\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-2d14009b5605d617.arrow


In [16]:
[s for s in temp['test']]

[{'response': "If I were in charge I wouldn't treat you like this. You're nothing but a big blowhard.",
  'context/0': "Eh, I've heard better.",
  'context/1': "Fine, I've got a toast. To Captain Bender! He's the best......at being a big jerk and his big ugly face is as dumb as a butt.",
  'context/2': "Thank you steward. Wiggles? Weren't you about to propose a toast to your gallant captain?",
  'context/3': "Would you cram a sock in it Bender? Those aren't even medals! They're bottlecaps and pepperoni slices.",
  'context/4': 'Brilliant!'},
 {'response': 'Evil Knievel could!',
  'context/0': 'Hah! No human could do all that.',
  'context/1': 'And I can deliver them. Billions and billions in one night.',
  'context/2': 'Toys! Toys! Toys!',
  'context/3': 'Now we can make toys again!',
  'context/4': "He's trapped!"},
 {'response': 'I was just working my way towards the medulla oblongata - control centre of the heart and lungs. And if I kill myself, you die with me.',
  'context/0': "Yo

In [17]:
lines_selected_list.append(['OK. First Bender, then Flexo, then Fry.', "Wait, let's go by rank.", 'FTM'])
lines_selected_list.append(["Just relax, Bender. Tomorrow we'll pry you down, have a nice breakfast and then go hunt down and slaughter that ancient evil.", "It'll be a rich, full day.", 'FTM'])
lines_selected_list.append(["I'm too scared.", 'Leela, your scaredness is being transmitted straight to Bender. If you care about Nibbler, stop caring about him!', 'FTM'])
lines_selected_list.append(['Dr. Zoidberg? Are you OK?', "He's dead.", 'FTM'])
lines_selected_list.append(['Fry, thank God we found you.', 'Leela? What are you guys doing here in the year 4000?', 'FTM'])
lines_selected_list

[['Barney, this is about the building.',
  'Come on. This is so about the girl.',
  'HIMYM'],
 ["All right. I'll be right there. Stay where you are.", 'Ted, Ted.', 'HIMYM'],
 ["I think there's a pretty girl smiling at me there.",
  'Hey, this is a chair, but go ahead and drag it.',
  'HIMYM'],
 ['I love you, man.', 'Me too, buddy.', 'HIMYM'],
 ["Not even if she's hot?", "Not even if her mom's hot.", 'HIMYM'],
 ['Soft kitty, warm kitty Little ball of fur',
  'Happy kitty, slippy kitty Pur pur pur',
  'TBBT'],
 ['Penny.', 'Thats just wrong.', 'TBBT'],
 ['Oh. Sheldon, thank you. Thats so romantic. But what about Rajesh? He was okay with you choosing the name?',
  'Well, it took a little negotiating, but I wore him down. Uh, we get the asteroid, and if you and I have children, they all have to be named Rajesh.',
  'TBBT'],
 ['I didnt break it. I, I guess Stuart sold it to me like this.',
  'Yes. Yes, he did, that is a perfectly satisfying and plausible explanation. Yeah, lets all be mad at

## Vader

In [18]:
temp = load_df(characters[4])

Using custom data configuration default-8fc66de038de764b
Reusing dataset csv (C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-8fc66de038de764b\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-8fc66de038de764b\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-fdf6a751d9e305ad.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-8fc66de038de764b\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-fd4f962239c0d8a3.arrow
Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-8fc66de038de764b\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-724da1b5b03e5c90.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-8fc66de038de764b\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-6b695a18a6bce864.arrow


In [19]:
[s for s in temp['test']]

[{'response': 'Give yourself to the dark side. It is the only way you can save your friends. Yes, your',
  'context/0': 'I will not fight you.',
  'context/1': 'You cannot hide forever, Luke.',
  'context/2': 'Throw me another charge.',
  'context/3': 'Yes, sir.',
  'context/4': 'Send three squads to help. Open the back door.'},
 {'response': 'Did you find any droids?',
  'context/0': 'Unlock one-five-seven and nine.Release charges.',
  'context/1': 'They must be trying to return thestolen plans to the princess. Shemay yet be of some use to us.',
  'context/2': 'Yes.',
  'context/3': 'Close all outboard shields! Closeall outboard shields!',
  'context/4': 'Stormtroopers run to their posts.'},
 {'response': 'They must never again leave thiscity.',
  'context/0': 'Lord Vader, what about Leia and theWookiee?',
  'context/1': 'He will not be permanently damaged.',
  'context/2': "He's no good to me dead.",
  'context/3': 'Lord Vader.',
  'context/4': "Stormtroopers?Here?We're indanger.I mu

In [20]:
lines_selected_list.append(['I will not fight you.', 'Give yourself to the dark side. It is the only way you can save your friends. Yes, your', 'SW'])
lines_selected_list.append(['Lord Vader, what about Leia and theWookiee?', 'They must never again leave thiscity.', 'SW'])
lines_selected_list.append(["The Emperor's coming here?", 'That is correct, Commander. And heis most displeased with your', 'SW'])
lines_selected_list.append(['Shall I hold them?', 'No. Leave them to me. I will deal', 'SW'])
lines_selected_list.append(['Lord Vader, what about Leia and theWookiee?', 'They must never again leave thiscity.', 'SW'])
lines_selected_list

[['Barney, this is about the building.',
  'Come on. This is so about the girl.',
  'HIMYM'],
 ["All right. I'll be right there. Stay where you are.", 'Ted, Ted.', 'HIMYM'],
 ["I think there's a pretty girl smiling at me there.",
  'Hey, this is a chair, but go ahead and drag it.',
  'HIMYM'],
 ['I love you, man.', 'Me too, buddy.', 'HIMYM'],
 ["Not even if she's hot?", "Not even if her mom's hot.", 'HIMYM'],
 ['Soft kitty, warm kitty Little ball of fur',
  'Happy kitty, slippy kitty Pur pur pur',
  'TBBT'],
 ['Penny.', 'Thats just wrong.', 'TBBT'],
 ['Oh. Sheldon, thank you. Thats so romantic. But what about Rajesh? He was okay with you choosing the name?',
  'Well, it took a little negotiating, but I wore him down. Uh, we get the asteroid, and if you and I have children, they all have to be named Rajesh.',
  'TBBT'],
 ['I didnt break it. I, I guess Stuart sold it to me like this.',
  'Yes. Yes, he did, that is a perfectly satisfying and plausible explanation. Yeah, lets all be mad at

## Joy

In [21]:
temp = load_df(characters[5])

Using custom data configuration default-7f374cd700d2bdae
Reusing dataset csv (C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-7f374cd700d2bdae\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-7f374cd700d2bdae\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-65c86748c5690d66.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-7f374cd700d2bdae\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-a81fcf1f90f0c48c.arrow
Loading cached split indices for dataset at C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-7f374cd700d2bdae\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-bf1e6672a4b7f4b2.arrow and C:\Users\david\Documents\unibo\natural_language_processing\project\BarneyBot\cache\csv\default-7f374cd700d2bdae\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-3ba118b31d22c7a1.arrow


In [22]:
[s for s in temp['test']]

[{'response': 'Oh, yknow what? Since Im here, I think Im gonna have me a little beer on the port side.',
  'context/0': 'No! No-no, no-no-no, very quiet, said with love, no yelling.',
  'context/1': 'Okay! Okay, youre yelling again! See that?',
  'context/2': 'Huh?',
  'context/3': 'Okay, go to the left.  The left!',
  'context/4': 'I dont know why you just dont say left.'},
 {'response': 'Hey come on now, this is a real date. Uh, sonice place you got here. Foosball, huh? Pizza box. Oh, a subscription to Playboy, my kind of woman.',
  'context/0': 'Oh man! This is so great! I actually feel like Im going on a real date! Although, I have a hint of morning sickness, and Im wearing underwear that goes up to aboutthere.',
  'context/1': 'And, a brownie!  Well, half a brownie. Actually, its just bag. Its been a long walk from the flower shop and I was startin to feel faint so',
  'context/2': 'Ohh, Lilies. Joey, theyre my favorite. Thank you.',
  'context/3': 'No, Im picking you up for our d

In [23]:
lines_selected_list.append(['Oh! Joey uh, were you in our room last night?', 'No.  I was told the name of the movie would not appear on the bill!', 'Friends'])
lines_selected_list.append(['Hey.', 'Hey-hey-hey! So, how did it go with Dana? Any reason I should leave a block of time open say Thursday?', 'Friends'])
lines_selected_list.append(['Joey... are you sure? I mean, I know how much you love him!', "Rachel... let's be clear on this, ok? I do not love Hugsy. I like him a normal amount...", 'Friends'])
lines_selected_list.append(['Ok, ten.', 'Okay, Monica picks ten, I call nine! Anyone else?', 'Friends'])
lines_selected_list.append(["Joey, Ross is gonna be here any second, would you mind watching Ben for me while I use the ladies' room?", 'Oh yeah, no problem.', 'Friends'])
lines_selected_list

[['Barney, this is about the building.',
  'Come on. This is so about the girl.',
  'HIMYM'],
 ["All right. I'll be right there. Stay where you are.", 'Ted, Ted.', 'HIMYM'],
 ["I think there's a pretty girl smiling at me there.",
  'Hey, this is a chair, but go ahead and drag it.',
  'HIMYM'],
 ['I love you, man.', 'Me too, buddy.', 'HIMYM'],
 ["Not even if she's hot?", "Not even if her mom's hot.", 'HIMYM'],
 ['Soft kitty, warm kitty Little ball of fur',
  'Happy kitty, slippy kitty Pur pur pur',
  'TBBT'],
 ['Penny.', 'Thats just wrong.', 'TBBT'],
 ['Oh. Sheldon, thank you. Thats so romantic. But what about Rajesh? He was okay with you choosing the name?',
  'Well, it took a little negotiating, but I wore him down. Uh, we get the asteroid, and if you and I have children, they all have to be named Rajesh.',
  'TBBT'],
 ['I didnt break it. I, I guess Stuart sold it to me like this.',
  'Yes. Yes, he did, that is a perfectly satisfying and plausible explanation. Yeah, lets all be mad at

## Standard data

In [24]:
# Also add a few standard questions-answers to the common dataset
lines_selected_list.append(['What are you doing for a living?', 'I am a lawyer.', 'Standard'])
lines_selected_list.append(['How are you doing?', 'Good.', 'Standard'])
lines_selected_list.append(['Where are you going to?', 'I am going out.', 'Standard'])
lines_selected_list.append(['What are you wearing?', 'A T-shirt.', 'Standard'])
lines_selected_list.append(['What do you want to do tonight?', 'Watching tv.', 'Standard'])
lines_selected_list

[['Barney, this is about the building.',
  'Come on. This is so about the girl.',
  'HIMYM'],
 ["All right. I'll be right there. Stay where you are.", 'Ted, Ted.', 'HIMYM'],
 ["I think there's a pretty girl smiling at me there.",
  'Hey, this is a chair, but go ahead and drag it.',
  'HIMYM'],
 ['I love you, man.', 'Me too, buddy.', 'HIMYM'],
 ["Not even if she's hot?", "Not even if her mom's hot.", 'HIMYM'],
 ['Soft kitty, warm kitty Little ball of fur',
  'Happy kitty, slippy kitty Pur pur pur',
  'TBBT'],
 ['Penny.', 'Thats just wrong.', 'TBBT'],
 ['Oh. Sheldon, thank you. Thats so romantic. But what about Rajesh? He was okay with you choosing the name?',
  'Well, it took a little negotiating, but I wore him down. Uh, we get the asteroid, and if you and I have children, they all have to be named Rajesh.',
  'TBBT'],
 ['I didnt break it. I, I guess Stuart sold it to me like this.',
  'Yes. Yes, he did, that is a perfectly satisfying and plausible explanation. Yeah, lets all be mad at

# Save the csv

In [25]:
# Create a pandas dataframe from the selected lines
df = pd.DataFrame(data=lines_selected_list, columns=['context/0', 'label', 'source'])
df = df[['label', 'context/0', 'source']]
df

Unnamed: 0,label,context/0,source
0,Come on. This is so about the girl.,"Barney, this is about the building.",HIMYM
1,"Ted, Ted.",All right. I'll be right there. Stay where you...,HIMYM
2,"Hey, this is a chair, but go ahead and drag it.",I think there's a pretty girl smiling at me th...,HIMYM
3,"Me too, buddy.","I love you, man.",HIMYM
4,Not even if her mom's hot.,Not even if she's hot?,HIMYM
5,"Happy kitty, slippy kitty Pur pur pur","Soft kitty, warm kitty Little ball of fur",TBBT
6,Thats just wrong.,Penny.,TBBT
7,"Well, it took a little negotiating, but I wore...","Oh. Sheldon, thank you. Thats so romantic. But...",TBBT
8,"Yes. Yes, he did, that is a perfectly satisfyi...","I didnt break it. I, I guess Stuart sold it to...",TBBT
9,"If I were not being careful, your telling me t...",Be careful.,TBBT


In [26]:
# Save the dataframe
out_file = os.path.join(base_folder, 'Data', 'common_dataset.csv')
df.to_csv(out_file, index=False)