In [3]:
import spacy
import nltk
from nltk.parse.dependency import DependencyGraph
from sklearn.linear_model import LogisticRegression

def get_pos_tags(sentence):
    tokens = nltk.word_tokenize(sentence)
    pos_tags = nltk.pos_tag(tokens)
    return pos_tags

def visualize_dependency(sentence):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)
    
    # Visualize the dependency tree
    from spacy import displacy
    displacy.render(doc, style="dep", jupyter=True)

def dependency_parse(sentence):
    # Bottom-up dependency parsing
    pos_tags = get_pos_tags(sentence)
    dg = DependencyGraph(pos_tags)
    bottom_up_tree = dg.triples()

    # Top-down dependency parsing
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)
    top_down_tree = [(token.text, token.head.text, token.dep_) for token in doc]

    return bottom_up_tree, top_down_tree

def predict_grammatical_correctness(sentence):
    # Preprocess the input sentence
    tokens = nltk.word_tokenize(sentence)
    features = [len(tokens), len(set(tokens)), sum(len(word) for word in tokens)]

    # Train a logistic regression model
    X_train = [[5, 5, 20], [10, 8, 50], [3, 3, 15], [7, 6, 35]]  # Example training data
    y_train = [1, 1, 0, 1]  # Example labels (1 for grammatically correct, 0 for incorrect)
    model = LogisticRegression()
    model.fit(X_train, y_train)

    # Make a prediction on the input sentence
    X_test = [features]
    prediction = model.predict(X_test)[0]

    return prediction

# Example usage
input_sentence = "The quick brown fox jumps over the lazy dog."

# POS tagging
pos_tags = get_pos_tags(input_sentence)
print("POS Tags:", pos_tags)

# Dependency parsing
bottom_up_tree, top_down_tree = dependency_parse(input_sentence)
print("Bottom-up Dependency Tree:", bottom_up_tree)
print("Top-down Dependency Tree:", top_down_tree)
visualize_dependency(input_sentence)

# Grammatical correctness prediction
grammatical_correctness = predict_grammatical_correctness(input_sentence)
print("Grammatical Correctness:", "Correct" if grammatical_correctness == 1 else "Incorrect")

ModuleNotFoundError: No module named 'nltk.parse.dependency'

In [2]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\anjan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\anjan\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [5]:
import spacy
from sklearn.linear_model import LogisticRegression

def get_pos_tags(sentence):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)
    pos_tags = [(token.text, token.pos_) for token in doc]
    return pos_tags

def visualize_dependency(sentence):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)
    
    # Visualize the dependency tree
    from spacy import displacy
    displacy.render(doc, style="dep", jupyter=True)

def dependency_parse(sentence):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)
    dependency_tree = [(token.text, token.head.text, token.dep_) for token in doc]
    return dependency_tree

def predict_grammatical_correctness(sentence):
    # Preprocess the input sentence
    tokens = sentence.split()
    features = [len(tokens), len(set(tokens)), sum(len(word) for word in tokens)]

    # Train a logistic regression model
    X_train = [[5, 5, 20], [10, 8, 50], [3, 3, 15], [7, 6, 35]]  # Example training data
    y_train = [1, 1, 0, 1]  # Example labels (1 for grammatically correct, 0 for incorrect)
    model = LogisticRegression()
    model.fit(X_train, y_train)

    # Make a prediction on the input sentence
    X_test = [features]
    prediction = model.predict(X_test)[0]

    return prediction

# Example usage
input_sentence = "The quick brown fox jumps it on the lazy dog."

# POS tagging
pos_tags = get_pos_tags(input_sentence)
print("POS Tags:", pos_tags)

# Dependency parsing
dependency_tree = dependency_parse(input_sentence)
print("Dependency Tree:", dependency_tree)
visualize_dependency(input_sentence)

# Grammatical correctness prediction
grammatical_correctness = predict_grammatical_correctness(input_sentence)
print("Grammatical Correctness:", "Correct" if grammatical_correctness == 1 else "Incorrect")

POS Tags: [('The', 'DET'), ('quick', 'ADJ'), ('brown', 'ADJ'), ('fox', 'NOUN'), ('jumps', 'VERB'), ('it', 'PRON'), ('on', 'ADP'), ('the', 'DET'), ('lazy', 'ADJ'), ('dog', 'NOUN'), ('.', 'PUNCT')]
Dependency Tree: [('The', 'fox', 'det'), ('quick', 'fox', 'amod'), ('brown', 'fox', 'amod'), ('fox', 'jumps', 'nsubj'), ('jumps', 'jumps', 'ROOT'), ('it', 'jumps', 'dobj'), ('on', 'jumps', 'prep'), ('the', 'dog', 'det'), ('lazy', 'dog', 'amod'), ('dog', 'on', 'pobj'), ('.', 'jumps', 'punct')]


Grammatical Correctness: Correct


In [7]:
! pip install transformers

Collecting transformers
  Downloading transformers-4.39.3-py3-none-any.whl.metadata (134 kB)
     ---------------------------------------- 0.0/134.8 kB ? eta -:--:--
     --- ------------------------------------ 10.2/134.8 kB ? eta -:--:--
     -------------------------------------- 134.8/134.8 kB 2.0 MB/s eta 0:00:00
Collecting tokenizers<0.19,>=0.14 (from transformers)
  Downloading tokenizers-0.15.2-cp311-none-win_amd64.whl.metadata (6.8 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.4.3-cp311-none-win_amd64.whl.metadata (3.9 kB)
Downloading transformers-4.39.3-py3-none-any.whl (8.8 MB)
   ---------------------------------------- 0.0/8.8 MB ? eta -:--:--
    --------------------------------------- 0.1/8.8 MB 3.3 MB/s eta 0:00:03
   - -------------------------------------- 0.2/8.8 MB 3.5 MB/s eta 0:00:03
   - -------------------------------------- 0.4/8.8 MB 3.5 MB/s eta 0:00:03
   -- ------------------------------------- 0.6/8.8 MB 3.9 MB/s eta 0:0

In [8]:
import spacy
from transformers import pipeline

def get_pos_tags(sentence):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)
    pos_tags = [(token.text, token.pos_) for token in doc]
    return pos_tags

def dependency_parse(sentence):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)
    dependency_tree = [(token.text, token.head.text, token.dep_) for token in doc]
    return dependency_tree

def predict_grammatical_correctness(sentence):
    # Load the pre-trained language model
    grammaticality_classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-grammar")

    # Make a prediction using the language model
    result = grammaticality_classifier(sentence)
    grammatical_score = result[0]["score"]

    # Use a logistic regression classifier to refine the prediction
    # (assuming you have trained the classifier on a labeled dataset)
    tokens = sentence.split()
    features = [len(tokens), len(set(tokens)), sum(len(word) for word in tokens)]
    X_train = [[5, 5, 20], [10, 8, 50], [3, 3, 15], [7, 6, 35]]  # Example training data
    y_train = [1, 1, 0, 1]  # Example labels (1 for grammatically correct, 0 for incorrect)
    model = LogisticRegression()
    model.fit(X_train, y_train)
    prediction = model.predict([features])[0]

    # Combine the language model and logistic regression predictions
    if grammatical_score > 0.5 and prediction == 1:
        return "Correct"
    else:
        return "Incorrect"

# Example usage
input_sentence = "The quick brown fox jumps over the lazy dog."

# POS tagging
pos_tags = get_pos_tags(input_sentence)
print("POS Tags:", pos_tags)

# Dependency parsing
dependency_tree = dependency_parse(input_sentence)
print("Dependency Tree:", dependency_tree)

# Grammatical correctness prediction
grammatical_correctness = predict_grammatical_correctness(input_sentence)
print("Grammatical Correctness:", grammatical_correctness)

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

POS Tags: [('The', 'DET'), ('quick', 'ADJ'), ('brown', 'ADJ'), ('fox', 'NOUN'), ('jumps', 'VERB'), ('over', 'ADP'), ('the', 'DET'), ('lazy', 'ADJ'), ('dog', 'NOUN'), ('.', 'PUNCT')]
Dependency Tree: [('The', 'fox', 'det'), ('quick', 'fox', 'amod'), ('brown', 'fox', 'amod'), ('fox', 'jumps', 'nsubj'), ('jumps', 'jumps', 'ROOT'), ('over', 'jumps', 'prep'), ('the', 'dog', 'det'), ('lazy', 'dog', 'amod'), ('dog', 'over', 'pobj'), ('.', 'jumps', 'punct')]


OSError: distilbert-base-uncased-finetuned-grammar is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`