In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from stempel import StempelStemmer

In [2]:
dataset = pd.read_csv('criminal.csv')

In [3]:
def transform_text(text):
    text = text.split()
    ss = StempelStemmer.default()
    text = [ss.stem(word) for word in text]
    # print(text)
    text = ' '.join(text)
    return text

In [4]:
corpus = []
for i in range(dataset.shape[0]):
    art = transform_text(dataset['crime'][i])
    corpus.append(art)
    

In [10]:
transformed_criminal = pd.DataFrame(corpus, columns=['crime'])
transformed_criminal.to_csv('transformed.csv', sep=',')

In [5]:
def fit_crime(my_crime, data):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(data)
    crime_vector = tfidf_vectorizer.transform([my_crime])
    similarity = cosine_similarity(crime_vector, tfidf_matrix)
    print(similarity)
    best_match_index = similarity.argsort()[0][-1]
    return dataset.iloc[best_match_index]['article_number'], dataset.iloc[best_match_index]['crime'], dataset.iloc[best_match_index]['penalty']

In [6]:
wykroczenie_input = "zabijałem"
wykroczenie_input = transform_text(wykroczenie_input)
print(wykroczenie_input)
numer, opis, kara = fit_crime(wykroczenie_input, corpus)

print(f"Dopasowane przestępstwo: {opis}")
print(f"Grożąca kara: {kara}")

zabijać
[[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.71242647 0.17800549 0.12690138 0.27827479 0.0971476  0.
  0.26699086 0.29277182 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.        ]]
Dopasowane przestępstwo: § 1. Kto zabija człowieka
Grożąca kara: od 10 lat do dożywotniego pozbawienia wolności
