In [12]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt')
nltk.download('stopwords')

class PlagiarismChecker:
    def __init__(self):
        self.stop_words = set(stopwords.words('english'))
        self.vectorizer = TfidfVectorizer()

    def preprocess_text(self, text):
        words = word_tokenize(text.lower())
        words = [word for word in words if word.isalnum() and word not in self.stop_words]
        return ' '.join(words)

    def calculate_similarity(self, user_text, reference_text):
        user_text = self.preprocess_text(user_text)
        reference_text = self.preprocess_text(reference_text)
        
        tfidf_matrix = self.vectorizer.fit_transform([user_text, reference_text])
        cosine_similarities = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])
        similarity_percentage = cosine_similarities[0][0] * 100
        
        return similarity_percentage

def main():
    plagiarism_checker = PlagiarismChecker()
    
    user_text = input("Enter your text: ")
    reference_text = input("Enter reference text for comparison: ")

    similarity_percentage = plagiarism_checker.calculate_similarity(user_text, reference_text)

    print(f"Plagiarism between Context: {similarity_percentage:.2f}%")

if __name__ == "__main__":
    main()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\athar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\athar\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Enter your text:   plagiarism_checker
Enter reference text for comparison:   plagiarism_checker = PlagiarismChecker()


Plagiarism between Context: 0.00%
