In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

In [None]:
#função para tratar campo data
def parser(x):
    return pd.datetime.strptime('190'+x, '%Y-%m')

#Agora vamos importar nosso arquivo 
df = pd.read_excel('Variavel_out.xlsx', sheet_name='LookerStudio')
print(df.tail(10))

In [None]:
def drop_columns(df):
    """
    Drops the specified columns from the DataFrame and returns the resulting DataFrame.
    Also prints the first few rows of the resulting DataFrame.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame.
    
    Returns:
    pd.DataFrame: The DataFrame after dropping the specified columns.
    """
    columns_to_drop = ['espessura', 'Index']
    df = df.drop(columns=columns_to_drop)
    return df

In [None]:
# Assuming 'df' is your DataFrame and has already been loaded
# Define features and target
X = df[['comprimento_Total', 'altura']]  # Adjust features as needed
y = (df['peso'] > df['peso'].median()).astype(int)  # Split based on median or chosen value of 'peso'

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train SVM classifier
svm = SVC(kernel='linear', C=1.0)
svm.fit(X_scaled, y)

# Print accuracy and classification report
y_pred = svm.predict(X_scaled)
print("Accuracy:", accuracy_score(y, y_pred))
print("\nClassification Report:\n", classification_report(y, y_pred))

# Define the decision boundary for the split
w = svm.coef_[0]
b = svm.intercept_[0]
peso_threshold = -b / w[1]  # Define based on the decision boundary

print(f"Suggested threshold for 'peso': {peso_threshold}")

# Add cluster column based on the threshold
df['cluster'] = (df['peso'] > peso_threshold).astype(int)

# Visualize the clusters and SVM decision boundary
plt.figure(figsize=(10, 6))
plt.scatter(df['comprimento_Total'], df['altura'], c=df['cluster'], cmap='coolwarm', edgecolor='k', s=40)
plt.title("Data Distribution with SVM Decision Boundary for 'peso'")
plt.xlabel('Comprimento Total')
plt.ylabel('Altura')

# Plot decision boundary
x_vals = np.linspace(X['comprimento_Total'].min(), X['comprimento_Total'].max(), 100)
y_vals = -(w[0] / w[1]) * scaler.transform(x_vals.reshape(-1, 1)) + (peso_threshold)
plt.plot(x_vals, y_vals, color='black', linestyle='--', label=f'Threshold at peso: {peso_threshold:.2f}')

plt.legend()
plt.show()