In [128]:
%pip install imbalanced-learn

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler






Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [129]:
df = pd.read_csv('social_media_dataset.csv')

In [130]:
df.head()

Unnamed: 0,post_id,timestamp,day_of_week,platform,user_id,location,language,text_content,hashtags,mentions,...,comments_count,impressions,engagement_rate,brand_name,product_name,campaign_name,campaign_phase,user_past_sentiment_avg,user_engagement_growth,buzz_change_rate
0,kcqbs6hxybia,2024-12-09 11:26:15,Monday,Instagram,user_52nwb0a6,"Melbourne, Australia",pt,Just tried the Chromebook from Google. Best pu...,#Food,,...,701,18991,0.19319,Google,Chromebook,BlackFriday,Launch,0.0953,-0.3672,19.1
1,vkmervg4ioos,2024-07-28 19:59:26,Sunday,Twitter,user_ucryct98,"Tokyo, Japan",ru,Just saw an ad for Microsoft Surface Laptop du...,"#MustHave, #Food","@CustomerService, @BrandCEO",...,359,52764,0.05086,Microsoft,Surface Laptop,PowerRelease,Post-Launch,0.1369,-0.451,-42.6
2,memhx4o1x6yu,2024-11-23 14:00:12,Saturday,Reddit,user_7rrev126,"Beijing, China",ru,What's your opinion about Nike's Epic React? ...,"#Promo, #Food, #Trending",,...,643,8887,0.45425,Nike,Epic React,BlackFriday,Post-Launch,0.2855,-0.4112,17.4
3,bhyo6piijqt9,2024-09-16 04:35:25,Monday,YouTube,user_4mxuq0ax,"Lagos, Nigeria",en,Bummed out with my new Diet Pepsi from Pepsi! ...,"#Reviews, #Sustainable","@StyleGuide, @BrandSupport",...,743,6696,0.42293,Pepsi,Diet Pepsi,LaunchWave,Launch,-0.2094,-0.0167,-5.5
4,c9dkiomowakt,2024-09-05 21:03:01,Thursday,Twitter,user_l1vpox2k,"Berlin, Germany",hi,Just tried the Corolla from Toyota. Absolutely...,"#Health, #Travel","@BrandSupport, @InfluencerName",...,703,47315,0.08773,Toyota,Corolla,LocalTouchpoints,Launch,0.6867,0.0807,38.8


In [131]:
df = df.dropna()

In [132]:
rename_dict = {
    'timestamp': 'data_e_hora',
    'day_of_week': 'dia_da_semana',
    'platform': 'plataforma',
    'user_id': 'id_usuario',
    'location': 'localizacao',
    'language': 'idioma',
    'text_content': 'conteudo_de_texto',
    'hashtags': 'hastags',
    'mentions': 'mencoes',
    'comments_count': 'comentarios',
    'likes_count': 'contagem',
    'impressions': 'impressoes',
    'engagement_rate': 'taxa_de_engajamento',
    'brand_name': 'marca_registrada',
    'product_name': 'nome_do_produto',
    'campaign_name': 'nome_da_campanha',
    'campaign_phase': 'fase_da_campnha',
    'user_past_sentiment_avg': 'sentimento_medio_do_usuario_anterior',
    'user_engagement_growth': 'crescimento_do_engajamento_do_usuario',
    'buzz_change_rate': 'taxa_de_mudanca_de_buzz'
}
df = df.rename(columns=rename_dict)


In [133]:
for col in df.select_dtypes(include=['object']).columns:
    lb = LabelEncoder()
    df[col] = lb.fit_transform(df[col])

In [134]:
variaveis_x = df.drop('plataforma', axis=1)
variavel_y = df['plataforma']
x_treino, x_teste, y_treino, y_teste = train_test_split(variaveis_x, variavel_y, test_size=0.3, random_state=10)




In [135]:
selector = SelectKBest(f_classif, k=10)
x_treino = selector.fit_transform(x_treino, y_treino)
x_teste = selector.transform(x_teste)





In [136]:
smote = SMOTE(random_state=10)
x_treino, y_treino = smote.fit_resample(x_treino, y_treino)


In [137]:
scaler = StandardScaler()
x_treino = scaler.fit_transform(x_treino)
x_teste = scaler.transform(x_teste)



In [138]:
modelo = RandomForestClassifier(n_estimators=100, random_state=10)
modelo.fit(x_treino, y_treino)
y_pred = modelo.predict(x_teste)





In [139]:
param_grid = {
    'n_estimators': [100, 500, 1000],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 5, 10]
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=10), param_grid, cv=5)
grid_search.fit(x_treino, y_treino)



In [142]:
from sklearn.ensemble import VotingClassifier

modelo1 = RandomForestClassifier(n_estimators=100, random_state=10)
modelo2 = GradientBoostingClassifier(n_estimators=100, random_state=10)

ensemble = VotingClassifier(estimators=[('rf', modelo1), ('gb', modelo2)])
ensemble.fit(x_treino, y_treino)




In [144]:
from sklearn.preprocessing import PolynomialFeatures

poly_features = PolynomialFeatures(degree=2)
x_treino_poly = poly_features.fit_transform(x_treino)
x_teste_poly = poly_features.transform(x_teste)

modelo = RandomForestClassifier(n_estimators=100, random_state=10)
modelo.fit(x_treino_poly, y_treino)



In [146]:
y_pred = modelo.predict(x_teste_poly)

# Avaliação do modelo
print("Acurácia:", accuracy_score(y_teste, y_pred))
print("Relatório de classificação:")
print(classification_report(y_teste, y_pred))
print("Matriz de confusão:")
print(confusion_matrix(y_teste, y_pred))



Acurácia: 0.2076095947063689
Relatório de classificação:
              precision    recall  f1-score   support

           0       0.20      0.22      0.21       480
           1       0.21      0.23      0.22       477
           2       0.23      0.23      0.23       475
           3       0.19      0.18      0.18       479
           4       0.22      0.19      0.20       507

    accuracy                           0.21      2418
   macro avg       0.21      0.21      0.21      2418
weighted avg       0.21      0.21      0.21      2418

Matriz de confusão:
[[107 104  98  96  75]
 [ 99 108  91  91  88]
 [113  99 107  72  84]
 [114 104  85  85  91]
 [103 109  91 109  95]]
