In [1]:
import pandas as pd
import numpy as np

## item a) Leitura do Dataset: 

In [2]:
arquivo = pd.read_parquet('fakeTelegram3.parquet') # arquivo tratado
print(arquivo.shape)
arquivo.head() 

(339286, 23)


Unnamed: 0,date_message,id_member_anonymous,id_group_anonymous,media,media_type,media_url,has_media,has_media_url,trava_zap,text_content_anonymous,...,score_misinformation,id_message,message_type,messenger,media_name,media_md5,words,domain,viral,sharings
0,2022-10-05 06:25:04,1078cc958f0febe28f4d03207660715f,12283e08a2eb5789201e105b34489ee7,,,,False,False,False,Então é Fato Renato o áudio que eu ouvi no wha...,...,,16385,Texto,telegram,,,20,,0,1
1,2022-10-05 06:25:08,,12283e08a2eb5789201e105b34489ee7,,,,False,False,False,"Saiu no YouTube do presidente a 8 horas atrás,...",...,,16386,Texto,telegram,,,23,,0,1
2,2022-10-05 06:26:28,92a2d8fd7144074f659d1d29dc3751da,9f2d7394334eb224c061c9740b5748fc,,,,False,False,False,"É isso, nossa parte já foi quase toda feita. N...",...,0.157242,16366,Texto,telegram,,,59,,0,1
3,2022-10-05 06:27:28,d60aa38f62b4977426b70944af4aff72,c8f2de56550ed0bf85249608b7ead93d,94dca4cda503100ebfda7ce2bcc060eb.jpg,image/jpg,,True,False,False,GENTE ACHEI ELES EM UMA SEITA MAÇONÁRICA,...,,19281,Imagem,telegram,,94dca4cda503100ebfda7ce2bcc060eb,7,,0,1
5,2022-10-05 06:28:30,,b52442a5fbc459ae590dca0d215e32f9,,,,False,False,False,Kķkkkkk to rindo até agora....Quem disse q ia ...,...,0.197813,2735,Texto,telegram,,,25,,0,1


## item b) Removendo trava-zaps, linhas repetidas e textos com menos de 5 palavras: 

In [3]:
#removendo trava-zap
arquivo = arquivo[arquivo['trava_zap'] != 1]

#removendo linhas duplicadas
arquivo = arquivo[arquivo.duplicated() == False]
arquivo = arquivo.drop_duplicates(subset=['text_content_anonymous', 'sharings'])


#texto com menos de 5 palavra
arquivo = arquivo[arquivo['text_content_anonymous'].str.split().str.len()>=5]

arquivo.shape

(222364, 23)

## item c) Agrupando linhas com mensagens semelhantes ou iguais:

#### Pré-processamento do texto:

In [None]:
import nltk
nltk.download('stopwords')

In [4]:
import re
import pandas as pd
from unidecode import unidecode


from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

stop_words = set(stopwords.words('portuguese')) 
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    # Lowercase
    text = text.lower()

    # Remover URLs
    text = re.sub(r"http\S+|www.\S+", "", text)

    # Remover números
    text = re.sub(r"\d+", "", text)

    # Remover pontuação
    text = re.sub(r"[^\w\s]", "", text)

    # Tokenizar
    tokens = word_tokenize(text)

    # Remover stopwords e aplicar lematização
    clean_tokens = [
        lemmatizer.lemmatize(word)
        for word in tokens if word not in stop_words and len(word) > 2
    ]

    # Reconstruir texto limpo
    return " ".join(clean_tokens)


arquivo['text_preprocessed'] = arquivo['text_content_anonymous'].apply(preprocess_text)


  from scipy.stats import fisher_exact


In [6]:
arquivo['text_preprocessed']

0         então fato renato áudio ouvi whatsapp ocorreu ...
1         saiu youtube presidente horas atrás infelizmen...
2         parte quase toda feita segundo turno completam...
3                              gente achei seita maçonárica
5         kķkkkkk rindo agoraquem disse fazer acordo dia...
                                ...                        
557539    compartilhando arquivo orientações solicitar a...
557542              carta forças armada sobre manifestações
557543    recuar compromisso desistir nunca opção deus a...
557545                                   visitem canal user
557554    sobre reunião generais bolsonaro dia dia após ...
Name: text_preprocessed, Length: 222364, dtype: object

In [None]:
! pip install datasketch

In [7]:
from datasketch import MinHash, MinHashLSH
import re

def get_shingles(text, n=5):
    text = re.sub(r'\W+', ' ', text.lower())
    tokens = text.split()
    return set([' '.join(tokens[i:i+n]) for i in range(len(tokens)-n+1)])

def create_minhash(shingles, num_perm=128):
    m = MinHash(num_perm=num_perm)
    for shingle in shingles:
        m.update(shingle.encode('utf8'))
    return m

In [17]:
def cluster_texts_with_lsh(df, text_column='text_content_anonymous', threshold=0.8, ngram=5):
    lsh = MinHashLSH(threshold=threshold, num_perm=128)
    minhashes = {}
    
    for idx, text in enumerate(df[text_column]):
        shingles = get_shingles(text, n=ngram)
        m = create_minhash(shingles)
        lsh.insert(idx, m)
        minhashes[idx] = m

    index_assigned = set()
    clusters = []

    for idx in range(len(df)):
        if idx in index_assigned:
            continue
        similar = lsh.query(minhashes[idx])
        unique_similar = [i for i in similar if i not in index_assigned]
        if unique_similar:
            index_assigned.update(unique_similar)
            clusters.append(set(unique_similar))

    return clusters

clusters = cluster_texts_with_lsh(arquivo, text_column='text_preprocessed', threshold=0.8, ngram=5)

grouped_data = []
arquivo['date_message'] = pd.to_datetime(arquivo['date_message'])

for cluster in clusters:
    cluster_indices = list(cluster)
    cluster_df = arquivo.iloc[cluster_indices]

    
    earliest_idx = cluster_df['date_message'].idxmin()
    
    grouped_data.append({
        'text_content_anonymous': arquivo.loc[earliest_idx, 'text_content_anonymous'],
        'text_preprocessed': arquivo.loc[earliest_idx, 'text_preprocessed'],
        'sharings': cluster_df['sharings'].sum(),
        'date_message': arquivo.loc[earliest_idx, 'date_message'],
        'id_group_anonymous': arquivo.loc[earliest_idx, 'id_group_anonymous'],
        'id_member_anonymous': arquivo.loc[earliest_idx, 'id_member_anonymous'],
        'media': arquivo.loc[earliest_idx, 'media'],
        'media_type': arquivo.loc[earliest_idx, 'media_type'],
        'media_url': arquivo.loc[earliest_idx, 'media_url'],
        'score_sentiment': cluster_df['score_sentiment'].mean(),
        'score_misinformation': cluster_df['score_misinformation'].mean()
    })

arquivo_agrupado = pd.DataFrame(grouped_data)


## item d) Criando novos atributos numéricos: 

In [23]:
arquivo_agrupado['caracteres'] = arquivo_agrupado['text_content_anonymous'].str.len()
arquivo_agrupado['words'] = arquivo_agrupado['text_content_anonymous'].str.split().str.len()


In [24]:
arquivo_agrupado.to_parquet('fakeTelegramAgrupado.parquet')

## item e)

#### Selecionando features

In [2]:
# Selecionando colunas numéricas e codificando media_type 
arquivo = pd.read_parquet('fakeTelegramAgrupado.parquet')
features = [
    'score_sentiment',
    'score_misinformation',
    'media_type',
    'caracteres',
    'words',
    'sharings']
arquivo_encoded = pd.get_dummies(arquivo[features], drop_first=True)
arquivo_encoded.head()

Unnamed: 0,score_sentiment,score_misinformation,caracteres,words,sharings,media_type_application/json,media_type_application/octet-stream,media_type_application/pdf,media_type_application/vnd.android.package-archive,media_type_application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,...,media_type_image/pdf,media_type_image/webm,media_type_image/wmv,media_type_text/html,media_type_url,media_type_video/3gpp,media_type_video/mp4,media_type_video/mpeg,media_type_video/webm,media_type_video/x-matroska
0,0.0,,110,20,1,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,0.0644,,141,23,1,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,-0.3551,0.157242,350,59,1,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,-0.033869,0.069624,67,7,20127,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
4,0.7003,0.197813,133,25,1,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [None]:
! pip install gensim

In [3]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
import nltk

In [None]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

#### Bag of Words(BOW)

In [6]:
bow_vectorizer = CountVectorizer(max_features=500)  # ou outro limite
X_bow = bow_vectorizer.fit_transform(arquivo['text_preprocessed']).toarray()
arquivo_bow = pd.DataFrame(X_bow, columns=[f'bow_{i}' for i in range(X_bow.shape[1])])
arquivo_bow = pd.concat([arquivo_bow, arquivo_encoded], axis=1)
arquivo_bow.head()


Unnamed: 0,bow_0,bow_1,bow_2,bow_3,bow_4,bow_5,bow_6,bow_7,bow_8,bow_9,...,media_type_image/pdf,media_type_image/webm,media_type_image/wmv,media_type_text/html,media_type_url,media_type_video/3gpp,media_type_video/mp4,media_type_video/mpeg,media_type_video/webm,media_type_video/x-matroska
0,0,0,0,0,0,0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False
1,0,0,0,0,0,0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False
2,0,0,0,0,0,0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False
3,0,0,0,0,0,0,0,0,0,0,...,False,False,False,False,True,False,False,False,False,False
4,0,0,0,0,0,0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False


In [7]:
arquivo_bow.to_parquet('arquivo_bow.parquet')

#### TF-IDF

In [8]:
tfidf = TfidfVectorizer(max_features=300)
X_tfidf = tfidf.fit_transform(arquivo['text_preprocessed']).toarray()
arquivo_tfidf = pd.DataFrame(X_tfidf, columns=[f'tfidf_{i}' for i in range(X_tfidf.shape[1])])
arquivo_tfidf = pd.concat([arquivo_tfidf, arquivo_encoded], axis=1)
arquivo_tfidf.head()

Unnamed: 0,tfidf_0,tfidf_1,tfidf_2,tfidf_3,tfidf_4,tfidf_5,tfidf_6,tfidf_7,tfidf_8,tfidf_9,...,media_type_image/pdf,media_type_image/webm,media_type_image/wmv,media_type_text/html,media_type_url,media_type_video/3gpp,media_type_video/mp4,media_type_video/mpeg,media_type_video/webm,media_type_video/x-matroska
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.251737,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,True,False,False,False,False,False
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False


In [9]:
arquivo_tfidf.to_parquet('arquivo_tfidf.parquet')

#### Word2Vec

In [10]:
# Tokenizar os textos
texts_tokenized = arquivo['text_preprocessed'].apply(word_tokenize)

# Treinar o modelo Word2Vec (ou use um pré-treinado)
model_w2v = Word2Vec(sentences=texts_tokenized, vector_size=100, window=5, min_count=1, workers=4)

# Função para vetorizar cada texto pela média dos vetores das palavras
def vectorize(text):
    words = [word for word in word_tokenize(text) if word in model_w2v.wv]
    if words:
        return np.mean(model_w2v.wv[words], axis=0)
    else:
        return np.zeros(model_w2v.vector_size)

X_w2v = np.vstack(arquivo['text_preprocessed'].apply(vectorize))

# DataFrame para visualização
arquivo_w2v = pd.DataFrame(X_w2v, columns=[f'w2v_{i}' for i in range(X_w2v.shape[1])])


In [11]:
arquivo_w2v = pd.concat([arquivo_w2v, arquivo_encoded], axis=1)
arquivo_w2v.head()

Unnamed: 0,w2v_0,w2v_1,w2v_2,w2v_3,w2v_4,w2v_5,w2v_6,w2v_7,w2v_8,w2v_9,...,media_type_image/pdf,media_type_image/webm,media_type_image/wmv,media_type_text/html,media_type_url,media_type_video/3gpp,media_type_video/mp4,media_type_video/mpeg,media_type_video/webm,media_type_video/x-matroska
0,0.299627,0.872473,1.404046,-0.495331,0.702305,-0.520024,-0.152611,0.550858,-0.169715,-0.782962,...,False,False,False,False,False,False,False,False,False,False
1,0.442348,0.507961,1.20478,-0.864692,0.646155,-0.969777,-0.599474,0.701066,-0.305362,-0.363944,...,False,False,False,False,False,False,False,False,False,False
2,0.150744,0.891145,0.641268,-0.29036,0.558042,-1.141146,-0.032849,0.74937,0.263763,-0.502847,...,False,False,False,False,False,False,False,False,False,False
3,1.729607,0.387148,1.296565,0.069322,0.544315,-0.384147,-0.726469,0.88253,-0.32294,-0.534237,...,False,False,False,False,True,False,False,False,False,False
4,0.178633,0.390625,1.192528,-0.664075,-0.06408,-0.894211,-0.009225,1.024703,-0.621092,-0.039709,...,False,False,False,False,False,False,False,False,False,False


In [12]:
arquivo_w2v.to_parquet('arquivo_w2v.parquet')

#### Split treino teste

In [32]:
arquivo_bow = pd.read_parquet('arquivo_bow.parquet')
arquivo_tfidf = pd.read_parquet('arquivo_tfidf.parquet')
arquivo_w2v = pd.read_parquet('arquivo_w2v.parquet')

In [16]:
arquivo_bow['score_misinformation'] = arquivo_bow['score_misinformation'].fillna(arquivo_bow['score_misinformation'].mean())
arquivo_tfidf['score_misinformation'] = arquivo_tfidf['score_misinformation'].fillna(arquivo_tfidf['score_misinformation'].mean())
arquivo_w2v['score_misinformation'] = arquivo_w2v['score_misinformation'].fillna(arquivo_w2v['score_misinformation'].mean())

In [18]:
arquivo_bow['score_sentiment'] = arquivo_bow['score_sentiment'].fillna(arquivo_bow['score_sentiment'].mean())
arquivo_tfidf['score_sentiment'] = arquivo_tfidf['score_sentiment'].fillna(arquivo_tfidf['score_sentiment'].mean())
arquivo_w2v['score_sentiment'] = arquivo_w2v['score_sentiment'].fillna(arquivo_w2v['score_sentiment'].mean())

In [19]:
arquivo_bow_X = arquivo_bow.drop(columns=['sharings'])
arquivo_tfidf_X = arquivo_tfidf.drop(columns=['sharings'])
arquivo_w2v_X = arquivo_w2v.drop(columns=['sharings'])
arquivo_y = arquivo['sharings']

In [20]:
from sklearn.model_selection import train_test_split, KFold
X_train_bow, X_test_bow, y_train_bow, y_test_bow = train_test_split(arquivo_bow_X, arquivo_y, test_size=0.2, random_state=42)
X_train_tfidf, X_test_tfidf, y_train_tfidf, y_test_tfidf = train_test_split(arquivo_tfidf_X, arquivo_y, test_size=0.2, random_state=42)
X_train_w2v, X_test_w2v, y_train_w2v, y_test_w2v = train_test_split(arquivo_w2v_X, arquivo_y, test_size=0.2, random_state=42)

#### Modelos

In [39]:
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet


def avaliar_modelos_com_kfold(X, y, nome_embedding):
    modelos = {
        "Linear": LinearRegression(),
        "Ridge": Ridge(),
        "Lasso": Lasso(),
        "ElasticNet": ElasticNet()
    }

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    resultados = {}

    print(f"\n====== Resultados para {nome_embedding} com K-Fold (5 folds) ======\n")

    for nome, modelo in modelos.items():
        rmse_scores = -cross_val_score(modelo, X, y, scoring='neg_root_mean_squared_error', cv=kf)
        r2_scores = cross_val_score(modelo, X, y, scoring='r2', cv=kf)

        print(f"{nome} - Regressão:")
        print("R²:", r2_scores)
        print("RMSE:", rmse_scores)
        print()

        resultados[nome] = {
            'r2': r2_scores.tolist(),
            'rmse': rmse_scores.tolist()
        }

    return resultados



#### Regressão Linear

In [40]:
result_linear = avaliar_modelos_com_kfold(arquivo_bow_X, arquivo_y, "BoW")



Linear - Regressão:
R²: [ 3.75652371e-01 -1.83186713e-01 -1.70137938e-04  2.96807576e-03
 -6.62456558e-02]
RMSE: [  3.61749614   3.93873039 102.4563627   89.215649     6.09346249]

Ridge - Regressão:
R²: [ 3.52603710e-01 -1.84942401e-01 -1.73198729e-04  2.01837409e-03
 -6.72882522e-02]
RMSE: [  3.68366346   3.94165158 102.45651947  89.25812913   6.09644092]

Lasso - Regressão:
R²: [-5.00529232e-05 -1.92836777e-03 -1.51156011e-05 -1.85453815e-05
  1.53836051e-03]
RMSE: [  4.57831544   3.6244987  102.44842223  89.34917236   5.8965936 ]

ElasticNet - Regressão:
R²: [ 2.16155403e-04 -1.56086586e-03 -1.26907764e-05 -1.85629859e-05
  1.69829591e-03]
RMSE: [  4.57770604   3.62383392 102.44829802  89.34917315   5.89612132]



In [41]:
result_linear = avaliar_modelos_com_kfold(arquivo_tfidf_X, arquivo_y, "TF-IDF")



Linear - Regressão:
R²: [ 1.24282159e-01 -5.32549714e-01 -3.43757084e-04  2.46069102e-03
 -1.89761831e-01]
RMSE: [  4.28427108   4.48267133 102.46525499  89.23834682   6.43673339]

Ridge - Regressão:
R²: [ 1.06676306e-01 -5.34845590e-01 -3.48237142e-04  1.53316822e-03
 -1.90471161e-01]
RMSE: [  4.32712329   4.48602777 102.46548444  89.27982457   6.43865188]

Lasso - Regressão:
R²: [-5.00529232e-05 -1.92836777e-03 -1.51156011e-05 -1.85453815e-05
  1.53836051e-03]
RMSE: [  4.57831544   3.6244987  102.44842223  89.34917236   5.8965936 ]

ElasticNet - Regressão:
R²: [ 2.16155403e-04 -1.56086586e-03 -1.26907764e-05 -1.85629859e-05
  1.69829591e-03]
RMSE: [  4.57770604   3.62383392 102.44829802  89.34917315   5.89612132]



In [42]:
result_linear = avaliar_modelos_com_kfold(arquivo_w2v_X, arquivo_y, "Word2Vec")



Linear - Regressão:
R²: [ 2.24055823e-01 -4.38151020e-01 -8.42709938e-05  2.31212900e-03
 -1.34978175e-01]
RMSE: [  4.03283155   4.34242032 102.45196454  89.24499164   6.28679444]

Ridge - Regressão:
R²: [ 2.00677258e-01 -4.37937965e-01 -8.62240059e-05  1.35355887e-03
 -1.35031378e-01]
RMSE: [  4.09313367   4.34209866 102.45206458  89.28785427   6.28694179]

Lasso - Regressão:
R²: [-5.00529232e-05 -1.92836777e-03 -1.51156011e-05 -1.85453815e-05
  1.53836051e-03]
RMSE: [  4.57831544   3.6244987  102.44842223  89.34917236   5.8965936 ]

ElasticNet - Regressão:
R²: [ 2.16155403e-04 -1.56086586e-03 -1.26907764e-05 -1.85629859e-05
  1.69829591e-03]
RMSE: [  4.57770604   3.62383392 102.44829802  89.34917315   5.89612132]



#### Regressão Polinomial

In [None]:
arquivo_bow_poly = arquivo_bow.copy()
arquivo_tfidf_poly = arquivo_tfidf.copy()
arquivo_w2v_poly = arquivo_w2v.copy()

for col in arquivo_bow_poly.columns:
    arquivo_bow_poly[f'{col}_squared'] = arquivo_bow_poly[col] ** 2
for col in arquivo_tfidf_poly.columns:
    arquivo_tfidf_poly[f'{col}_squared'] = arquivo_tfidf_poly[col] ** 2
for col in arquivo_w2v_poly.columns:
    arquivo_w2v_poly[f'{col}_squared'] = arquivo_w2v_poly[col] ** 2

In [43]:
result_linear = avaliar_modelos_com_kfold(arquivo_bow_X, arquivo_y, "BoW")



Linear - Regressão:
R²: [ 3.75652371e-01 -1.83186713e-01 -1.70137938e-04  2.96807576e-03
 -6.62456558e-02]
RMSE: [  3.61749614   3.93873039 102.4563627   89.215649     6.09346249]

Ridge - Regressão:
R²: [ 3.52603710e-01 -1.84942401e-01 -1.73198729e-04  2.01837409e-03
 -6.72882522e-02]
RMSE: [  3.68366346   3.94165158 102.45651947  89.25812913   6.09644092]

Lasso - Regressão:
R²: [-5.00529232e-05 -1.92836777e-03 -1.51156011e-05 -1.85453815e-05
  1.53836051e-03]
RMSE: [  4.57831544   3.6244987  102.44842223  89.34917236   5.8965936 ]

ElasticNet - Regressão:
R²: [ 2.16155403e-04 -1.56086586e-03 -1.26907764e-05 -1.85629859e-05
  1.69829591e-03]
RMSE: [  4.57770604   3.62383392 102.44829802  89.34917315   5.89612132]



In [44]:
result_linear = avaliar_modelos_com_kfold(arquivo_tfidf_X, arquivo_y, "TF-IDF")



Linear - Regressão:
R²: [ 1.24282159e-01 -5.32549714e-01 -3.43757084e-04  2.46069102e-03
 -1.89761831e-01]
RMSE: [  4.28427108   4.48267133 102.46525499  89.23834682   6.43673339]

Ridge - Regressão:
R²: [ 1.06676306e-01 -5.34845590e-01 -3.48237142e-04  1.53316822e-03
 -1.90471161e-01]
RMSE: [  4.32712329   4.48602777 102.46548444  89.27982457   6.43865188]

Lasso - Regressão:
R²: [-5.00529232e-05 -1.92836777e-03 -1.51156011e-05 -1.85453815e-05
  1.53836051e-03]
RMSE: [  4.57831544   3.6244987  102.44842223  89.34917236   5.8965936 ]

ElasticNet - Regressão:
R²: [ 2.16155403e-04 -1.56086586e-03 -1.26907764e-05 -1.85629859e-05
  1.69829591e-03]
RMSE: [  4.57770604   3.62383392 102.44829802  89.34917315   5.89612132]



In [45]:
result_linear = avaliar_modelos_com_kfold(arquivo_w2v_X, arquivo_y, "Word2Vec")



Linear - Regressão:
R²: [ 2.24055823e-01 -4.38151020e-01 -8.42709938e-05  2.31212900e-03
 -1.34978175e-01]
RMSE: [  4.03283155   4.34242032 102.45196454  89.24499164   6.28679444]

Ridge - Regressão:
R²: [ 2.00677258e-01 -4.37937965e-01 -8.62240059e-05  1.35355887e-03
 -1.35031378e-01]
RMSE: [  4.09313367   4.34209866 102.45206458  89.28785427   6.28694179]

Lasso - Regressão:
R²: [-5.00529232e-05 -1.92836777e-03 -1.51156011e-05 -1.85453815e-05
  1.53836051e-03]
RMSE: [  4.57831544   3.6244987  102.44842223  89.34917236   5.8965936 ]

ElasticNet - Regressão:
R²: [ 2.16155403e-04 -1.56086586e-03 -1.26907764e-05 -1.85629859e-05
  1.69829591e-03]
RMSE: [  4.57770604   3.62383392 102.44829802  89.34917315   5.89612132]

