In [27]:
from __future__ import unicode_literals, print_function

import plac
import random
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding

In [28]:
import json
def convert_dataturks_to_spacy(dataturks_JSON_FilePath):
    try:
        training_data = []
        lines=[]
        with open(dataturks_JSON_FilePath, 'r') as f:
            lines = f.readlines()

        for line in lines:
            data = json.loads(line)
            text = data['content']
            entities = []
            for annotation in data['annotation']:
                #only a single point in text annotation.
                point = annotation['points'][0]
                labels = annotation['label']
                # handle both list of labels or a single label.
                if not isinstance(labels, list):
                    labels = [labels]

                for label in labels:
                    #dataturks indices are both inclusive [start, end] but spacy is not [start, end)
                    entities.append((point['start'], point['end'] + 1 ,label))


            training_data.append((text, {"entities" : entities}))

        return training_data
    except Exception as e:
        logging.exception("Unable to process " + dataturks_JSON_FilePath + "\n" + "error = " + str(e))
        return None

In [29]:
TRAIN_DATA = convert_dataturks_to_spacy("C:/Users/Anbu/Desktop/resp/final30desc.json");

In [30]:
TRAIN_DATA

[('1.\nABOUT:\nHangar India is looking for a talented Data Scientist, who is responsible for the development of digital creative using industry standard technologies and applications. He/she codes the creative within the guidelines of interactive best practices and with emphasis on maintaining aesthetics.\nWHAT YOUï¿½LL DO:\nDeliver impactful analyses of assigned projects with high quality, on-time, and within estimated hours \nReview data for insights, trends, and makes key observations\nSolve problems by thinking through approaches to the data or analysis\nParticipate as a collaborative team member, holding himself/herself and the team accountable for the quality of the output\nWHAT YOUï¿½VE GOT:\n5+ Years of applied analytic experience in a marketing or data driven role, working with marketing database solutions\nSolid understanding of relational & dimensional database models, including ability to break complex processes and methodologies into easy-to-understand explanations for int

In [31]:
model=None
output_dir=Path("C:\\Users\\Anbu\\Desktop\\resp")
n_iter=50

In [32]:
if model is not None:
    nlp = spacy.load(model)  # load existing spaCy model
    print("Loaded model '%s'" % model)
else:
    nlp = spacy.blank("en")  # create blank Language class
    print("Created blank 'en' model")

Created blank 'en' model


In [33]:
if "ner" not in nlp.pipe_names:
    ner = nlp.create_pipe("ner")
    nlp.add_pipe(ner, last=True)
    # otherwise, get it so we can add labels
else:
    ner = nlp.get_pipe("ner")

In [35]:
import tqdm
nlp.vocab.vectors.name = 'spacy_trained_vectors'
for _, annotations in TRAIN_DATA:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer=nlp.begin_training()
    for itn in range(n_iter):
        random.shuffle(TRAIN_DATA)
        losses = {}
            # batch up the examples using spaCy's minibatch
        batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
        for text, annotations in (TRAIN_DATA):
            nlp.update(
                [text],
                [annotations],
                sgd=optimizer,
                drop=0.35,
                losses=losses
                       )
        print(losses)
      


{'ner': 228.2007167386661}
{'ner': 218.80770673450945}
{'ner': 201.46086339681884}
{'ner': 157.57561630261657}
{'ner': 134.32075051765773}
{'ner': 200.70113242677076}
{'ner': 131.81240396169187}
{'ner': 160.04608435894107}
{'ner': 134.70247963801089}
{'ner': 85.33788671226743}
{'ner': 118.55617733402748}
{'ner': 98.00203176349319}
{'ner': 83.15918017814528}
{'ner': 105.1521067827413}
{'ner': 71.51596529936255}
{'ner': 55.97301921504784}
{'ner': 71.81021871145404}
{'ner': 69.19735871828495}
{'ner': 68.83737079762832}
{'ner': 62.99685307242146}
{'ner': 65.02928421953052}
{'ner': 36.81363454311117}
{'ner': 61.18199341409871}
{'ner': 62.57199275276579}
{'ner': 43.41105791647907}
{'ner': 32.993685412510864}
{'ner': 37.97275124608687}
{'ner': 58.22378561743055}
{'ner': 27.3231391089834}
{'ner': 39.75089669446113}
{'ner': 48.91025416463335}
{'ner': 24.414013535095265}
{'ner': 16.54781921598597}
{'ner': 37.40471476260107}
{'ner': 34.923874100349934}
{'ner': 33.22593534892605}
{'ner': 26.019046

In [37]:
    # save model to output directory
if output_dir is not None:
     output_dir = Path(output_dir)
    if not output_dir.exists():
        output_dir.mkdir()
    nlp.to_disk(output_dir)
    print("Saved model to", output_dir)


Saved model to C:\Users\Anbu\Desktop\resp


In [43]:

wikitext=nlp(" Reference Code- Inf_EXTERNAL_10038992_21Role Designation- Technology AnalystTechnical & Professional requirements- Basic Qualifications- - Experience Range- 3-5 years. At least 2 years of experience and excellent understanding of Machine learning techniques and algorithm such as Neural Networks, Naive Bayes, SVM, Decision Forests, etc.,o NLP, text analytics technologies.,o Common data science toolkits, such as R, Python Data Science Libraries, MatLab, etc. Excellence in at least one of these is highly desirable,o Data visualization tools, such as D3.js, GGplot, etc.,o Query languages such as SQL, Hive.,Good applied statistics skills, such as distributions, statistical testing, regression, etc.,At least 5 years of hands on experience with more than one programming language (Python / Scala/ Java/SQL),Role and responsibilities- - ,You will be responsible for delivering high-value next-generation products on aggressive deadlines and will be required to write high-quality, highly optimized/high-performance and maintainable code that your fellow developers love ,You will be a core member of a team that does whatever it takes to delight customers, take an iterative and result oriented approach to software development. In this position you will provide best-fit architectural solutions for multi-product, multi-project, multi-industry portfolios providing technology consultation and assisting in defining scope and sizing of work ,You will be the anchor in Proof of Concept developments and support opportunity identification and pursuit processes and evangelize Infosys brand ,You will collaborate with some of the best talent in the industry to create and implement innovative high quality solutions, lead and participate in sales and pursuits focused on our clients' business needs ,You will be part of a learning culture, where teamwork and collaboration are encouraged, excellence is rewarded, and diversity is respected and valued ,The role involves high end technology and hence would require you to be proficient in coding as well,Location- Bangalore Job Locations- Bangalore,BangaloreResponsibilites- Ensure effective Design, Development & Validation of activities in line with client needs and architectural requirements.,Ensure continual knowledge management.,Adherence to the organizational guidelines and processesSkills- R, Python, Machine Learning Company Description-Infosys is a leading provider of next generation consulting,technology and outsourcing solutions.We are dedicated to helping organizations,build tomorrows enterprise and advance the way the world works Thats why Forbes ranks us 19th among the top 100 most innovative companies. Our employees partner with clients to transform their business - one conversation; one idea; one insight at a time.While we are at it, some things remain unchanged- the unwavering ethics,transparency and respect behind everything we do. We will always be a company powered by intellect and driven by values.So, if your passion is to build solutions that  really make a difference to enterprises,the community and your world, Infosys is the right place for you.")


In [44]:
nlp3=spacy.load(output_dir)

In [45]:
for word in wikitext.ents:
    print(word.text,word.label_)

3-5 years Experience
2 years Experience
Machine learning Skills
NLP Skills
Python Skills
Data Science Skills
MatLab Skills
Data visualization Skills
5 years Experience
Python Skills
Scala/ Java Skills
Bangalore Location
Bangalore Location
Python Skills
Machine Learning Skills


In [46]:
from spacy import displacy

In [47]:
displacy.render(wikitext, style="ent",jupyter="True")

In [10]:
import pyaudio
p = pyaudio.PyAudio()
for i in range(p.get_device_count()):
    info = p.get_device_info_by_index(i)
    print(info['index'], info['name'])

0 Microsoft Sound Mapper - Input
1 Microphone (Realtek High Defini
2 Microsoft Sound Mapper - Output
3 Speakers / Headphones (Realtek 


In [11]:
import speech_recognition as sr

r = sr.Recognizer()
with sr.Microphone(device_index=1) as source:
    print ('Say Something!')
    audio = r.listen(source)
    print ('Done!')
    
text = r.recognize_google(audio)
print (text)

Say Something!


KeyboardInterrupt: 

In [12]:
import speech_recognition as sr

r = sr.Recognizer()
with sr.Microphone() as source:
    print("Speak Anything :")
    audio = r.listen(source)
    try:
        text = r.recognize_google(audio)
        print("You said : {}".format(text))
    except:
        print("Sorry could not recognize what you said")

Speak Anything :


KeyboardInterrupt: 