In [None]:
import pandas as pd

# Dosyayı oku
df = pd.read_csv("/content/consumercomplaints.csv", low_memory=False)

# İlk 5 satıra bakalım
print(df.head())
print(df.columns)


   Unnamed: 0 Date received  \
0           0    2022-11-11   
1           1    2022-11-23   
2           2    2022-11-16   
3           3    2022-11-15   
4           4    2022-11-07   

                                             Product  \
0                                           Mortgage   
1  Credit reporting, credit repair services, or o...   
2                                           Mortgage   
3                        Checking or savings account   
4                                           Mortgage   

                  Sub-product                           Issue  \
0  Conventional home mortgage  Trouble during payment process   
1            Credit reporting     Improper use of your report   
2                 VA mortgage  Trouble during payment process   
3            Checking account             Managing an account   
4      Other type of mortgage  Trouble during payment process   

                                       Sub-issue  \
0                                

In [None]:
# Gerekli alanlar dolu olan satırları alalım
df = df[['Consumer complaint narrative', 'Product']].dropna()

# İsimleri kısaltalım
df.columns = ['complaint', 'product']

# İlk 5 satırı görelim
print(df.head())


                                            complaint  \
3   Hi, I have been banking with Wells Fargo for o...   
11  XXXX is attempting to collect funds for Valuat...   
15  Today I called to get my balance and reset my ...   
51  The Federal Trade Commission Bureau of Consume...   
56  We applied for a home loan using agent XXXX XX...   

                                              product  
3                         Checking or savings account  
11                                    Debt collection  
15  Credit reporting, credit repair services, or o...  
51  Credit reporting, credit repair services, or o...  
56                                           Mortgage  


In [None]:
import re
import string

def clean_text(text):
    text = text.lower()  # küçük harf
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)  # link temizliği
    text = re.sub(r'\@w+|\#', '', text)  # @ ve #
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)  # noktalama işaretleri
    text = re.sub(r'\w*\d\w*', '', text)  # sayılar
    text = re.sub(r'\s+', ' ', text).strip()  # fazla boşluk
    return text

# Uygula
df["clean_complaint"] = df["complaint"].apply(clean_text)

df[["clean_complaint", "product"]].head()


Unnamed: 0,clean_complaint,product
3,hi i have been banking with wells fargo for ov...,Checking or savings account
11,xxxx is attempting to collect funds for valuat...,Debt collection
15,today i called to get my balance and reset my ...,"Credit reporting, credit repair services, or o..."
51,the federal trade commission bureau of consume...,"Credit reporting, credit repair services, or o..."
56,we applied for a home loan using agent xxxx xx...,Mortgage


In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df["label"] = le.fit_transform(df["product"])

# Etiket eşleşmesi (opsiyonel ama faydalı)
label_map = dict(zip(le.classes_, le.transform(le.classes_)))
print("Etiket eşleşmeleri:\n", label_map)


Etiket eşleşmeleri:
 {'Bank account or service': np.int64(0), 'Checking or savings account': np.int64(1), 'Consumer Loan': np.int64(2), 'Credit card': np.int64(3), 'Credit card or prepaid card': np.int64(4), 'Credit reporting': np.int64(5), 'Credit reporting, credit repair services, or other personal consumer reports': np.int64(6), 'Debt collection': np.int64(7), 'Money transfer, virtual currency, or money service': np.int64(8), 'Money transfers': np.int64(9), 'Mortgage': np.int64(10), 'Other financial service': np.int64(11), 'Payday loan': np.int64(12), 'Payday loan, title loan, or personal loan': np.int64(13), 'Prepaid card': np.int64(14), 'Student loan': np.int64(15), 'Vehicle loan or lease': np.int64(16)}


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

X = df["clean_complaint"]
y = df["label"]

# TF-IDF Vektörizasyon
tfidf = TfidfVectorizer(max_features=5000)
X_vec = tfidf.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Test seti performansı
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.55      0.05      0.09       129
           1       0.74      0.81      0.78      2567
           2       0.67      0.03      0.06        59
           3       0.80      0.06      0.12       124
           4       0.76      0.78      0.77      3699
           5       0.71      0.03      0.05       194
           6       0.91      0.95      0.93     27563
           7       0.79      0.73      0.76      6315
           8       0.79      0.67      0.73      1207
           9       0.00      0.00      0.00         9
          10       0.89      0.90      0.89      3014
          11       0.00      0.00      0.00         4
          12       0.00      0.00      0.00         8
          13       0.63      0.34      0.44       549
          14       0.00      0.00      0.00        15
          15       0.85      0.75      0.80       870
          16       0.67      0.57      0.62       819

    accuracy              

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
import pickle

# Model ve TF-IDF birlikte kaydediyoruz
with open("complaint_model.pkl", "wb") as f:
    pickle.dump((model, tfidf, le), f)


In [None]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression



# Save vectorizer
with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

