In [93]:
import pandas as pd

In [94]:
df=pd.read_csv('cleaned_train.csv')

In [95]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102115 entries, 0 to 102114
Data columns (total 3 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   valak_presence_level   102115 non-null  float64
 1   warren_sanctity_index  102115 non-null  float64
 2   possession_state       102115 non-null  object 
dtypes: float64(2), object(1)
memory usage: 2.3+ MB


In [96]:
df['possession_state'] = df['possession_state'].replace('Humán', 'Human')

In [97]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

df['possession_state']=le.fit_transform(df['possession_state'])

In [98]:
X = df.drop(columns=['possession_state'], axis=1)
y = df['possession_state']

In [99]:
from sklearn.model_selection import train_test_split 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [100]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, f1_score, recall_score, roc_auc_score
from sklearn.linear_model import LogisticRegression

In [101]:
logreg_basic = LogisticRegression()
logreg_basic.fit(X_train, y_train)

In [86]:
# Implementing GridSearchCV on our Model

# We need this to specify the l1_ratio for elasticnet
from scipy.stats import uniform 

# Define separate, valid parameter grids
params = [
    # Grid 1: For 'liblinear' solver
    {
        'penalty': ['l1', 'l2'],
        'solver': ['liblinear'],
        'max_iter': [10, 50, 100, 200, 500, 1000],
        'multi_class': ['ovr']  # 'liblinear' only supports 'ovr'
    },
    
    # Grid 2: For 'lbfgs', 'newton-cg', 'newton-cholesky', 'sag' solvers
    {
        'penalty': ['l2', None],
        'solver': ['lbfgs', 'newton-cg', 'newton-cholesky', 'sag'],
        'max_iter': [10, 50, 100, 200, 500, 1000],
        'multi_class': ['auto', 'ovr', 'multinomial']
    },
    
    # Grid 3: For 'saga' solver (the most flexible)
    {
        'penalty': ['l1', 'l2', 'elasticnet', None],
        'solver': ['saga'],
        'max_iter': [10, 50, 100, 200, 500, 1000],
        'multi_class': ['auto', 'ovr', 'multinomial'],
        'l1_ratio': uniform(0, 1) # Add l1_ratio for elasticnet
    }
]

# --- Your code will now work ---
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
# Assuming logreg_basic is an instance of LogisticRegression()
# from sklearn.linear_model import LogisticRegression
# logreg_basic = LogisticRegression()

cv = StratifiedKFold(n_splits=5)

# Pass the LIST of dictionaries to param_distributions
# Note: RandomizedSearchCV uses 'param_distributions', but 'param_grid' also works
grid = RandomizedSearchCV(
    estimator=logreg_basic, 
    param_distributions=params, # Use the new 'params' list here
    cv=cv, 
    n_jobs=-1, 
    scoring='accuracy',
    n_iter=20 # It's good practice to set n_iter
)

grid.fit(X_train, y_train)

print("Best parameters found:")
print(grid.best_params_)



Best parameters found:
{'max_iter': 1000, 'multi_class': 'auto', 'penalty': 'l2', 'solver': 'newton-cholesky'}




In [87]:
grid.best_score_

1.0

In [88]:
logreg_bestmodel=grid.best_estimator_
logreg_bestmodel.fit(X_train, y_train)



In [102]:
df2 = pd.read_csv('test.csv')

In [103]:
df2.head()

Unnamed: 0.1,Unnamed: 0,warren_sanctity_index,valak_presence_level
0,0,1.869665,1.246625
1,1,-33.695463,-8.878922
2,2,-13.834169,-4.969592
3,3,-46.137362,-7.471159
4,4,7.819434,0.747186


In [104]:
df2.drop(columns=['Unnamed: 0'], axis = 1, inplace = True)

In [105]:
df2.head()

Unnamed: 0,warren_sanctity_index,valak_presence_level
0,1.869665,1.246625
1,-33.695463,-8.878922
2,-13.834169,-4.969592
3,-46.137362,-7.471159
4,7.819434,0.747186


In [106]:
df.head()

Unnamed: 0,valak_presence_level,warren_sanctity_index,possession_state
0,166.519639,828.598197,0
1,-559.77591,-2802.879549,0
2,-700.7673,-3507.836501,0
3,251.291646,1252.458231,0
4,249.255928,1242.27964,0


In [107]:
new_column_order = [
    'valak_presence_level',  # New Column 1
    'warren_sanctity_index'  # New Column 2
]

# Reindex the DataFrame using the new order
df2 = df2[new_column_order]

In [108]:
df2.head()

Unnamed: 0,valak_presence_level,warren_sanctity_index
0,1.246625,1.869665
1,-8.878922,-33.695463
2,-4.969592,-13.834169
3,-7.471159,-46.137362
4,0.747186,7.819434


In [109]:
y_pred = logreg_basic.predict(df2)

In [110]:
y_pred

array([0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1,
       1, 0, 0, 0, 0, 1])

In [111]:
y_pred

array([0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1,
       1, 0, 0, 0, 0, 1])

In [91]:
df2['possession']=y_pred

In [92]:
df2.to_csv('cnn.csv')