# Análise de Dados do Twitter

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

![separator1](https://i.imgur.com/ZUWYTii.png)

# Carregar dados

In [2]:
df = pd.read_csv('data/Bolsonaro1.csv')

In [3]:
df.head()

Unnamed: 0,hora,twitts
0,03:23:20,"[('0', 472), ('1', 29)]"
1,03:23:59,"[('0', 465), ('1', 36)]"
2,03:24:39,"[('0', 463), ('1', 38)]"
3,03:25:16,"[('0', 470), ('1', 31)]"
4,03:25:52,"[('0', 470), ('1', 31)]"


In [4]:
df.shape

(51, 2)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   hora    51 non-null     object
 1   twitts  51 non-null     object
dtypes: object(2)
memory usage: 944.0+ bytes


In [6]:
df.dtypes

hora      object
twitts    object
dtype: object

![separator1](https://i.imgur.com/ZUWYTii.png)

# Transformar a coluna hora em "datetime"

In [7]:
# Turn this column in a datetime:
pd.to_datetime(df['hora']).head()

0   2021-01-30 03:23:20
1   2021-01-30 03:23:59
2   2021-01-30 03:24:39
3   2021-01-30 03:25:16
4   2021-01-30 03:25:52
Name: hora, dtype: datetime64[ns]

In [8]:
# Put on df:
df['hora'] = pd.to_datetime(df['hora'])

In [9]:
df.head()

Unnamed: 0,hora,twitts
0,2021-01-30 03:23:20,"[('0', 472), ('1', 29)]"
1,2021-01-30 03:23:59,"[('0', 465), ('1', 36)]"
2,2021-01-30 03:24:39,"[('0', 463), ('1', 38)]"
3,2021-01-30 03:25:16,"[('0', 470), ('1', 31)]"
4,2021-01-30 03:25:52,"[('0', 470), ('1', 31)]"


In [10]:
df.dtypes

hora      datetime64[ns]
twitts            object
dtype: object

![separator1](https://i.imgur.com/ZUWYTii.png)

# Colocar a coluna hora como índice

In [11]:
# Set the index of the df to be the "hora"
df.set_index('hora', inplace=True)

In [12]:
df.head()

Unnamed: 0_level_0,twitts
hora,Unnamed: 1_level_1
2021-01-30 03:23:20,"[('0', 472), ('1', 29)]"
2021-01-30 03:23:59,"[('0', 465), ('1', 36)]"
2021-01-30 03:24:39,"[('0', 463), ('1', 38)]"
2021-01-30 03:25:16,"[('0', 470), ('1', 31)]"
2021-01-30 03:25:52,"[('0', 470), ('1', 31)]"


In [13]:
df.columns

Index(['twitts'], dtype='object')

![separator1](https://i.imgur.com/ZUWYTii.png)

# Separar e limpar sujeira da coluna twitts 

In [14]:
# Split da col twitts
df['twitts'].str.split(',', expand=True)

Unnamed: 0_level_0,0,1,2,3
hora,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-30 03:23:20,[('0',472),('1',29)]
2021-01-30 03:23:59,[('0',465),('1',36)]
2021-01-30 03:24:39,[('0',463),('1',38)]
2021-01-30 03:25:16,[('0',470),('1',31)]
2021-01-30 03:25:52,[('0',470),('1',31)]
2021-01-30 03:26:26,[('0',457),('1',44)]
2021-01-30 03:27:04,[('0',464),('1',37)]
2021-01-30 03:27:42,[('0',474),('1',27)]
2021-01-30 03:28:20,[('0',471),('1',30)]
2021-01-30 03:28:57,[('0',462),('1',39)]


In [16]:
# Colocar no df 
df = df['twitts'].str.split(',', expand=True)

In [17]:
# Renomear cols
df.columns = ['zero', 'um', 'dois', 'tres']

In [18]:
df.head()

Unnamed: 0_level_0,zero,um,dois,tres
hora,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-30 03:23:20,[('0',472),('1',29)]
2021-01-30 03:23:59,[('0',465),('1',36)]
2021-01-30 03:24:39,[('0',463),('1',38)]
2021-01-30 03:25:16,[('0',470),('1',31)]
2021-01-30 03:25:52,[('0',470),('1',31)]


In [20]:
# Fç para limpar os caracteres especiais:
def corrigir_nomes(nome):
    nome = nome.replace(')', '').replace(']', '')
    return nome

In [21]:
# Limpar a col "um"
df['um'] = df['um'].apply(corrigir_nomes)


In [22]:
df.head()

Unnamed: 0_level_0,zero,um,dois,tres
hora,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-30 03:23:20,[('0',472,('1',29)]
2021-01-30 03:23:59,[('0',465,('1',36)]
2021-01-30 03:24:39,[('0',463,('1',38)]
2021-01-30 03:25:16,[('0',470,('1',31)]
2021-01-30 03:25:52,[('0',470,('1',31)]


In [23]:
# Limpar a col "tres"
df['tres'] = df['tres'].apply(corrigir_nomes)

In [26]:
df.head()

Unnamed: 0_level_0,zero,um,dois,tres
hora,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-30 03:23:20,[('0',472,('1',29
2021-01-30 03:23:59,[('0',465,('1',36
2021-01-30 03:24:39,[('0',463,('1',38
2021-01-30 03:25:16,[('0',470,('1',31
2021-01-30 03:25:52,[('0',470,('1',31


In [27]:
# Excluir a col "zero" e aplicar ao df: 
df = df.drop(columns=['zero'])

In [28]:
df.head()

Unnamed: 0_level_0,um,dois,tres
hora,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-30 03:23:20,472,('1',29
2021-01-30 03:23:59,465,('1',36
2021-01-30 03:24:39,463,('1',38
2021-01-30 03:25:16,470,('1',31
2021-01-30 03:25:52,470,('1',31


In [29]:
# Excluir a col "dois" e aplicar ao df: 
df = df.drop(columns=['dois'])

In [30]:
df.head()

Unnamed: 0_level_0,um,tres
hora,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-30 03:23:20,472,29
2021-01-30 03:23:59,465,36
2021-01-30 03:24:39,463,38
2021-01-30 03:25:16,470,31
2021-01-30 03:25:52,470,31


![separator1](https://i.imgur.com/ZUWYTii.png)

# Transformar as colunas em numérico 

In [31]:
df.columns

Index(['um', 'tres'], dtype='object')

In [32]:
df.dtypes

um      object
tres    object
dtype: object

In [36]:
# Transf col em numerico col "um"
pd.to_numeric(df['um']).head()

hora
2021-01-30 03:23:20    472
2021-01-30 03:23:59    465
2021-01-30 03:24:39    463
2021-01-30 03:25:16    470
2021-01-30 03:25:52    470
Name: um, dtype: int64

In [38]:
# Put on df:
df['um'] = pd.to_numeric(df['um'])

In [39]:
df.dtypes

um       int64
tres    object
dtype: object

In [40]:
# Transf col em numerico col "um"
pd.to_numeric(df['tres']).head()

hora
2021-01-30 03:23:20    29
2021-01-30 03:23:59    36
2021-01-30 03:24:39    38
2021-01-30 03:25:16    31
2021-01-30 03:25:52    31
Name: tres, dtype: int64

In [41]:
# Put on df:
df['tres'] = pd.to_numeric(df['tres'])

In [42]:
df.dtypes

um      int64
tres    int64
dtype: object

In [46]:
df

Unnamed: 0_level_0,um,tres
hora,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-30 03:23:20,472,29
2021-01-30 03:23:59,465,36
2021-01-30 03:24:39,463,38
2021-01-30 03:25:16,470,31
2021-01-30 03:25:52,470,31
2021-01-30 03:26:26,457,44
2021-01-30 03:27:04,464,37
2021-01-30 03:27:42,474,27
2021-01-30 03:28:20,471,30
2021-01-30 03:28:57,462,39


![separator1](https://i.imgur.com/ZUWYTii.png)

# Visualização