In [3]:
import pandas as pd

df = pd.read_csv('response.csv') 
df = df.fillna('')
df['response']=df.iloc[:,3]+df.iloc[:,5]+df.iloc[:,6]
df['issue'] = df.iloc[:,1]
df['symptom'] = df.iloc[:,2] + df.iloc[:,4]
subset = df.iloc[:,-3:]
subset

Unnamed: 0,response,issue,symptom
0,my brakes make a noise whenever I try to stop,Brakes,Car makes grinding noise
1,super frustrating every time I start my car it...,Starter,Car starts then stops
2,I can't open the damn door to my car,Other,
3,I turn the key and nothing happens,Starter,Car doesn't start
4,Car doesn't always start when it's low on blin...,Starter,Car doesn't start
5,My car makes a weird whirring sound every time...,Starter,Car makes an odd noise
6,I keep driving past my driveway because the ca...,Brakes,Car doesn't stop in timely manner
7,I feel a bit of rumbling to the car and a weir...,Brakes,Car makes grinding noise
8,I can hear the starter firing but the car does...,Starter,Car doesn't start
9,Car does not start. It makes a sputtering nois...,Starter,Car doesn't start


In [4]:
import markovify
import codecs

In [5]:
def train_markov_type(data, issue):
    return markovify.Text(data[data["issue"] == issue].response, retain_original=False, state_size=2)

def make_sentence(model, length=200):
    return model.make_short_sentence(length, max_overlap_ratio = .7, max_overlap_total=15)

other_model = train_markov_type(subset, "Other")
brakes_model = train_markov_type(subset, "Brakes")
starter_model = train_markov_type(subset, "Starter")

In [6]:
make_sentence(other_model)

"My car keeps reporting that my driver's side front tire has no tire pressure, but when I bought the car."

In [7]:
make_sentence(brakes_model)

'I keep driving past my driveway because the car refuses to stop and sometimes very bumpy.'

In [8]:
make_sentence(starter_model)

'Cannot get the car to turn over.'

We can combine these models with relative weights

In [9]:
compound_model = markovify.combine([other_model, brakes_model, starter_model], [14, 7, 7])

In [10]:
for i in range(20):
    print(make_sentence(compound_model))

I have a problem with my blinker fluid.
The pedal feels squishy.
The windshield has a lot noise and does not stop
It is very unsafe.
My battery keeps dying.
Car creates whistle sound each time I try to set it at anything other than MAX COOL, it defaults to full blast heat or full blast heat even if the door won't open.
Blind Spot Monitoring system will randomly be disabled.
I can't put gas in my car but it kind of won't stop either.
I have to stretch my leg to do it.
When I drive, I feel a bit of rumbling to the air bag or seat belt.
Squealing sound
Usefulness is vanishing...
Rear right tail light is out.
Brake pedal engagement varies, sometimes hard sometimes soft
My tire keeps going flat.
I'm on the menu to return to my previous display which is very slow to get moving and there's a horrible grinding noise as I drive.
Maybe the brakes disengage.
The car is leaning on left
With that, highway driving is very unpleasant.
It's not actually a grinding noise coming from the front of the ve

In [11]:
import numpy
def generate_cases(models, weights=None):
    if weights is None:
        weights = [1] * len(models)
    
    choices = []
    
    total_weight = float(sum(weights))
    
    for i in range(len(weights)):
        choices.append((float(sum(weights[0:i+1])) / total_weight, models[i]))
    
    def choose_model():
        r = numpy.random.uniform()
        for (p, m) in choices:
            if r <= p:
                return m
        return choices[-1][1]


    def choose_from(c):
        idx = math.floor(numpy.random.uniform() * len(c))
        return c[idx]
    
    
    while True:
        yield (make_sentence(choose_model()))
            

In [12]:
t = generate_cases([other_model, brakes_model, starter_model], [3,4,4])

In [13]:
[next(t) for i in range(100)]

["It makes a grinding sound for a few seconds and then won't start again.",
 'I try to start the engine doesn’t power up',
 'It is more squealing or screeching sounds than grinding.',
 'I turn the key and nothing happens',
 "The rotors are warped so it's difficult to push the break pedal without lurching.",
 'It is more squealing or screeching sounds than grinding.',
 'Leak of a clear, cloudy, oil like substance in the car.',
 "I don't get it.",
 'Just that odd clicking noise....',
 'I was working on the road',
 'https://www.youtube.com/watch?v=bEME9licodY',
 'It seems like I have to stretch my leg to do it.',
 'Car creates whistle sound each time I start my car starts.',
 'Then I have to stretch my leg to do it.',
 'With that, highway driving is very dangerous',
 "Starter makes clicking noise, won't start again.",
 "It's rather a periodic deep sound when breaking.",
 "When I'm taking a left turn, the steering wheel shudders and I can't open the damn door to my previous display which i

Checking for similarity (slow)

In [15]:
!pip install -U pip setuptools wheel
!pip install -U spacy

Collecting setuptools
  Downloading setuptools-54.1.2-py3-none-any.whl (785 kB)
[K     |████████████████████████████████| 785 kB 32.4 MB/s eta 0:00:01
Collecting wheel
  Downloading wheel-0.36.2-py2.py3-none-any.whl (35 kB)
Installing collected packages: wheel, setuptools
  Attempting uninstall: wheel
    Found existing installation: wheel 0.35.1
    Uninstalling wheel-0.35.1:
      Successfully uninstalled wheel-0.35.1
  Attempting uninstall: setuptools
    Found existing installation: setuptools 53.0.0
    Uninstalling setuptools-53.0.0:
      Successfully uninstalled setuptools-53.0.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thoth-python 0.11.0 requires beautifulsoup4==4.6.3, but you have beautifulsoup4 4.8.2 which is incompatible.
tensorflow 2.3.1 requires numpy<1.19.0,>=1.16.0, but you have numpy 1.19.2 which is incompatible.
tensorflow 2.3.1 

In [16]:
import spacy
nlp = spacy.load('en_core_web_sm')

dt_b = subset["response"]
dt_a = [next(t) for i in range(100)]

import numpy as np
a = []
for sentence in dt_a:
    doc = nlp(sentence)
    m = 0
    for sentence1 in dt_b:
        doc1 = nlp(sentence1)
        if m < doc.similarity(doc1):
            m = doc.similarity(doc1)
    a.append(m)
        
print("Mean similarity: " + str(np.array(a).mean()))

import seaborn as sns
sns.displot(a)

OSError: Can't find spaCy data path: None

In [256]:
import cProfile

def timing(c):
    for _ in range(c):
        next(t)

cProfile.run('timing(2000)', 'generatestats')

In [257]:
import pstats
p = pstats.Stats('generatestats')
p.strip_dirs().sort_stats(-1).print_stats()

Tue Mar  9 11:33:38 2021    generatestats

         303366 function calls (278038 primitive calls) in 0.158 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     2000    0.002    0.000    0.146    0.000 <ipython-input-246-3e3ab942a540>:4(make_sentence)
     2000    0.002    0.000    0.009    0.000 <ipython-input-252-5f1382f3447f>:13(choose_model)
     2000    0.001    0.000    0.157    0.000 <ipython-input-252-5f1382f3447f>:2(generate_cases)
        1    0.001    0.001    0.158    0.158 <ipython-input-256-801ca619797e>:3(timing)
        1    0.000    0.000    0.158    0.158 <string>:1(<module>)
    64057    0.017    0.000    0.024    0.000 chain.py:10(accumulate)
    27366    0.019    0.000    0.125    0.000 chain.py:112(gen)
     2038    0.005    0.000    0.130    0.000 chain.py:125(walk)
    27366    0.069    0.000    0.106    0.000 chain.py:96(move)
     2038    0.001    0.000    0.002    0.000 text.py:120(word_join)
    

<pstats.Stats at 0x1322e3190>