<a href="https://colab.research.google.com/github/greeangarden/model_mobil/blob/main/netfilix2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()


Saving wiki_movie_plots_deduped.csv to wiki_movie_plots_deduped.csv


In [None]:
import pandas as pd

# CSV’yi yükle (gerekirse encoding ver)
df = pd.read_csv("wiki_movie_plots_deduped.csv")

# İsimleri standartla
df = df.rename(columns={"Plot": "description", "Genre": "genre"})

# Boşları at
df = df.dropna(subset=["description", "genre"])

# Çoklu tür varsa ilkini al (örn. "Drama, Romance" -> "Drama")
df["genre"] = df["genre"].astype(str).str.split(",").str[0].str.strip()

# En sık 6 türü otomatik seç (set sabitlemek istemezsen)
top_genres = df["genre"].value_counts().head(6).index.tolist()

# İstersen elle de koyabilirsin, örn:
# top_genres = ["Drama","Comedy","Action","Romance","Horror","Thriller"]

# Diğerlerini Other yap
df["genre"] = df["genre"].where(df["genre"].isin(top_genres), "Other")

print(df["genre"].value_counts())
df.head()


genre
Other      13151
drama       6669
unknown     6083
comedy      5042
action      1568
horror      1281
romance     1092
Name: count, dtype: int64


Unnamed: 0,Release Year,Title,Origin/Ethnicity,Director,Cast,genre,Wiki Page,description
0,1901,Kansas Saloon Smashers,American,Unknown,,unknown,https://en.wikipedia.org/wiki/Kansas_Saloon_Sm...,"A bartender is working at a saloon, serving dr..."
1,1901,Love by the Light of the Moon,American,Unknown,,unknown,https://en.wikipedia.org/wiki/Love_by_the_Ligh...,"The moon, painted with a smiling face hangs ov..."
2,1901,The Martyred Presidents,American,Unknown,,unknown,https://en.wikipedia.org/wiki/The_Martyred_Pre...,"The film, just over a minute long, is composed..."
3,1901,"Terrible Teddy, the Grizzly King",American,Unknown,,unknown,"https://en.wikipedia.org/wiki/Terrible_Teddy,_...",Lasting just 61 seconds and consisting of two ...
4,1902,Jack and the Beanstalk,American,"George S. Fleming, Edwin S. Porter",,unknown,https://en.wikipedia.org/wiki/Jack_and_the_Bea...,The earliest known adaptation of the classic f...


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

X = df["description"]
y = df["genre"]

vectorizer = TfidfVectorizer(stop_words="english", max_features=7000, ngram_range=(1,2), max_df=0.85, min_df=2)
X_tfidf = vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1500)  # istersen class_weight="balanced" deneyebilirsin
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.5063055316709659
              precision    recall  f1-score   support

       Other       0.51      0.73      0.60      2629
      action       0.47      0.10      0.16       310
      comedy       0.51      0.39      0.44       981
       drama       0.46      0.37      0.41      1339
      horror       0.69      0.30      0.42       236
     romance       0.41      0.10      0.16       238
     unknown       0.53      0.49      0.51      1245

    accuracy                           0.51      6978
   macro avg       0.51      0.35      0.39      6978
weighted avg       0.50      0.51      0.48      6978



In [None]:
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score

# Modeli oluştur
svc_model = LinearSVC(max_iter=5000)  # iterasyon sayısını biraz yüksek tuttuk
svc_model.fit(X_train, y_train)

# Tahmin
y_pred_svc = svc_model.predict(X_test)

# Sonuçlar
print("Doğruluk (Accuracy):", accuracy_score(y_test, y_pred_svc))
print("\nSınıflandırma Raporu:\n", classification_report(y_test, y_pred_svc))


Doğruluk (Accuracy): 0.47836056176554886

Sınıflandırma Raporu:
               precision    recall  f1-score   support

       Other       0.52      0.62      0.57      2629
      action       0.29      0.16      0.21       310
      comedy       0.44      0.44      0.44       981
       drama       0.41      0.37      0.39      1339
      horror       0.58      0.45      0.51       236
     romance       0.30      0.11      0.16       238
     unknown       0.49      0.48      0.49      1245

    accuracy                           0.48      6978
   macro avg       0.43      0.38      0.39      6978
weighted avg       0.47      0.48      0.47      6978



In [None]:
def predict_genre(text):
    text_tfidf = vectorizer.transform([text])
    prediction = svc_model.predict(text_tfidf)
    return prediction[0]

# Örnek açıklama denemesi
sample = "A retired hitman returns to the underworld to seek vengeance after the death of his beloved dog."
print("Tahmin:", predict_genre(sample))
sample = "Three friends get into ridiculous situations as they plan the ultimate prank on their neighbor."
print("Tahmin:", predict_genre(sample))
sample = "A well-known comedian takes the stage to deliver sharp observations about everyday life, politics, and relationships."
print("Tahmin:", predict_genre(sample))
sample = "A powerful look into the life of a refugee family as they struggle to find a new home and rebuild their lives."
print("Tahmin:", predict_genre(sample))
sample = "A young girl goes on a magical journey with her talking pet to save her village from an evil sorcerer."
print("Tahmin:", predict_genre(sample))
sample = "In this spine-chilling tale, a family moves into a new house only to discover a terrifying presence lurking in the shadows."
print("Tahmin:", predict_genre(sample))
sample = "A gifted violinist must choose between love and his lifelong dream of performing on the world’s biggest stage."
print("Tahmin:", predict_genre(sample))
sample = "When two strangers meet on a delayed flight, sparks fly and a new chapter begins in their lives."
print("Tahmin:", predict_genre(sample))


Tahmin: action
Tahmin: comedy
Tahmin: drama
Tahmin: drama
Tahmin: Other
Tahmin: Other
Tahmin: comedy
Tahmin: unknown
