# Búsqueda de noticias relacionadas a cooperativas

In [1]:
import os
import pandas as pd
import plotly.express as px
import plotly.io as pio

from dotenv import load_dotenv
from glob import glob
from pathlib import Path
from rich.pretty import pprint

In [2]:
load_dotenv()

BASE_DIR = os.environ.get("BASE_DIR")
BEARER_TOKEN = os.environ.get("BEARER_TOKEN")

In [3]:
pd.set_option("display.max_colwidth", 300)
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 50)
pd.set_option("display.precision", 2)
pd.set_option("display.float_format",  "{:,.3f}".format)

pio.templates.default = "plotly_white"
pio.kaleido.scope.default_scale = 2

gruvbox_colors = ["#fabd2f", "#b8bb26", "#458588", "#fe8019", "#b16286", "#fb4943", "#689d6a", "#d79921", "#98971a", "#83a598", "#d65d0e", "#d3869b", "#cc241d", "#8ec07c", "#b57614", "#79740e", "#076678", "#af3a03", "#8f3f71", "#9d0006", "#4d7b58", "#fbf1c7", "#928374", "#282828"]

## Tweets database

In [4]:
dtm = pd.read_feather(f"{BASE_DIR}/data/04_feature/dtm/dtm-(2023, 21).feather")
corpus = pd.read_feather(f"{BASE_DIR}/data/05_model_input/corpus/corpus-(2022, 35).feather")

In [5]:
dtm.head()

lemma,id.,aa,aap,aar,aarón,aas,abad,abajo,abancay,abandona,abandonado,abandonar,abandono,abanico,abanto,abastecer,abastecido,abatir,abby,abbyardilesp,abdul,abeja,abel,abiertamente,abierto,...,éxito,ícono,ídolo,índice,ínt,íntimo,íntir,ñañitar,ñoco,ñusta,óptico,órden,órgano,óscar,óseo,últim,últimamente,últimominuto,únetir,único,úrsular,‍‍,→,↓,🖩
0,1657898765129248771,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1657898774847430657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1657898799560286208,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1657899767853088769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1657899963018354688,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
corpus.head()

Unnamed: 0,index,id,created_at,newspaper,text,corpus
0,0,1564039479391838209,2022-08-28 23:57:24+00:00,elcomercio_peru,Venezuela y Colombia retoman relaciones diplomáticas rotas hace tres años https://t.co/L6uVA6LcEE,venezuela colombia retoman relaciones diplomáticas rotas hace tres años
1,1,1564037610393280512,2022-08-28 23:49:59+00:00,elcomercio_peru,“Me dijeron que estaba llevando vergüenza a la universidad”: la profesora obligada a renunciar por postear fotos en bikini https://t.co/zAe98GI7W2,me dijeron que estaba llevando vergüenza la universidad la profesora obligada renunciar por postear fotos en bikini
2,2,1564032331706470401,2022-08-28 23:29:00+00:00,elcomercio_peru,AMLO afirma que familias ya aceptaron plan de rescate de 10 mineros https://t.co/dG3VJXWgNa,amlo afirma que familias ya aceptaron plan de rescate de mineros
3,3,1564028601053347843,2022-08-28 23:14:11+00:00,elcomercio_peru,Zelensky: los ocupantes rusos sentirán las consecuencias de “futuras acciones” https://t.co/mNJTLz0SS7,zelensky los ocupantes rusos sentirán las consecuencias de futuras acciones
4,4,1564023766937731073,2022-08-28 22:54:58+00:00,elcomercio_peru,Autoridades confirman transmisión comunitaria de viruela del mono en Panamá https://t.co/EBFcdrHz4Y,autoridades confirman transmisión comunitaria de viruela del mono en panamá


In [7]:
dtm_folder = Path(f"{BASE_DIR}/data/04_feature/dtm")
dtm_file_list = [item for item in dtm_folder.iterdir() if item.is_file()]

In [8]:
selected_tweets = {}

for dtm_file in dtm_file_list:
    timestamp = dtm_file.stem.split("-")[1]
    
    dtm = pd.read_feather(dtm_file)

    dtm.set_index("id.", inplace=True)

    # Buscando columnas que digan cooperativa
    filtered_dtm = dtm.filter(regex="cooperativa")

    # Indice de Tweets
    filtered_index = filtered_dtm.loc[(filtered_dtm > 0).any(axis=1)].index

    if len(filtered_index) > 0:
        selected_tweets[timestamp] =  filtered_index.tolist()

In [9]:
pprint(selected_tweets)

In [10]:
corpus_list = []

for timestamp, index in selected_tweets.items():
    corpus = pd.read_feather(f"{BASE_DIR}/data/05_model_input/corpus/corpus-{timestamp}.feather")

    corpus_list.append(corpus.loc[corpus["id"].isin(index)])

filtered_corpus = pd.concat(corpus_list)    

In [11]:
filtered_corpus.drop("index", axis=1, inplace=True)

In [12]:
filtered_corpus.info()

<class 'pandas.core.frame.DataFrame'>
Index: 48 entries, 9859 to 4827
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype              
---  ------      --------------  -----              
 0   id          48 non-null     object             
 1   created_at  48 non-null     datetime64[ns, UTC]
 2   newspaper   48 non-null     object             
 3   text        48 non-null     object             
 4   corpus      48 non-null     object             
dtypes: datetime64[ns, UTC](1), object(4)
memory usage: 2.2+ KB


In [13]:
filtered_corpus

Unnamed: 0,id,created_at,newspaper,text,corpus
9859,1551888491176464385,2022-07-26 11:13:43+00:00,Gestionpe,Diez cooperativas agrarias para desarrollar nuevos cultivos se crearían este año https://t.co/XBm9o0J9vP,diez cooperativas agrarias para desarrollar nuevos cultivos se crearían este año
5618,1599777731868254208,2022-12-05 14:48:28+00:00,Gestionpe,#PlusG | Los planes de las cooperativas para atraer más ahorro de personas en 2023 https://t.co/ujaRaSDDtP https://t.co/xeUKzYo028,los planes de las cooperativas para atraer más ahorro de personas en
5652,1599746342762737668,2022-12-05 12:43:44+00:00,Gestionpe,#PlusG | Los planes de las cooperativas para atraer más ahorro de personas en 2023 ► https://t.co/mRsVxKa4bS https://t.co/sGHc9oAjph,los planes de las cooperativas para atraer más ahorro de personas en
5666,1599722596303609857,2022-12-05 11:09:23+00:00,Gestionpe,Los planes de las cooperativas para atraer más ahorro de personas en 2023 https://t.co/fvKy86mhQD,los planes de las cooperativas para atraer más ahorro de personas en
6269,1600109915514830850,2022-12-06 12:48:27+00:00,diariocorreo,Ahorristas de cooperativas denuncian a consejero José Luis Hancco https://t.co/xNyM5bCW2y,ahorristas de cooperativas denuncian consejero josé luis hancco
2989,1608967808167149573,2022-12-30 23:26:33+00:00,Gestionpe,#LoMásLeído2022 ✔ Estas son las 20 cooperativas de ahorro y crédito disueltas por SBS https://t.co/4yGTJauxKU https://t.co/PaIzYZaVWp,estas son las cooperativas de ahorro crédito disueltas por sbs
1605,1574823094014939142,2022-09-27 18:07:39+00:00,larepublica_pe,RT @Politica_LR: Ministra Ocampo se compromete a impulsar reglamento de ley de Cooperativas Agrarias \n\nDestacó la importancia del cooperati…,politicalr ministra ocampo se compromete impulsar reglamento de ley de cooperativas agrarias destacó la importancia del cooperati
3642,1642659287687208960,2023-04-02 22:44:27+00:00,Gestionpe,"SBS dispone disolución de cuatro cooperativas, ¿cuáles fueron los motivos? \n\nLa Superintendencia de Banca, Seguros y AFP (SBS) designó a administradores temporales. Los detalles a continuación.\n\nhttps://t.co/i5Hi2cbtyb",sbs dispone disolución de cuatro cooperativas cuáles fueron los motivos la superintendencia de banca seguros afp sbs designó administradores temporales los detalles continuación
4921,1640975963071684611,2023-03-29 07:15:32+00:00,diariocorreo,Policía llega a calmar bronca en Cooperativa de Huancavelica entre delegados y directivos https://t.co/5nMSRuRVwG,policía llega calmar bronca en cooperativa de huancavelica entre delegados directivos
6172,1641854116455669762,2023-03-31 17:25:00+00:00,DiarioElPeruano,La @SBSPERU declara la disolución de cuatro cooperativas de ahorro y crédito. Conoce aquí cuáles fueron las causales👉https://t.co/FRIhKnOWIE https://t.co/MB71reiQkD,la sbsperu declara la disolución de cuatro cooperativas de ahorro crédito conoce aquí cuáles fueron las causales


In [14]:
filtered_corpus.drop_duplicates(subset=["corpus", "newspaper"])

Unnamed: 0,id,created_at,newspaper,text,corpus
9859,1551888491176464385,2022-07-26 11:13:43+00:00,Gestionpe,Diez cooperativas agrarias para desarrollar nuevos cultivos se crearían este año https://t.co/XBm9o0J9vP,diez cooperativas agrarias para desarrollar nuevos cultivos se crearían este año
5618,1599777731868254208,2022-12-05 14:48:28+00:00,Gestionpe,#PlusG | Los planes de las cooperativas para atraer más ahorro de personas en 2023 https://t.co/ujaRaSDDtP https://t.co/xeUKzYo028,los planes de las cooperativas para atraer más ahorro de personas en
5666,1599722596303609857,2022-12-05 11:09:23+00:00,Gestionpe,Los planes de las cooperativas para atraer más ahorro de personas en 2023 https://t.co/fvKy86mhQD,los planes de las cooperativas para atraer más ahorro de personas en
6269,1600109915514830850,2022-12-06 12:48:27+00:00,diariocorreo,Ahorristas de cooperativas denuncian a consejero José Luis Hancco https://t.co/xNyM5bCW2y,ahorristas de cooperativas denuncian consejero josé luis hancco
2989,1608967808167149573,2022-12-30 23:26:33+00:00,Gestionpe,#LoMásLeído2022 ✔ Estas son las 20 cooperativas de ahorro y crédito disueltas por SBS https://t.co/4yGTJauxKU https://t.co/PaIzYZaVWp,estas son las cooperativas de ahorro crédito disueltas por sbs
1605,1574823094014939142,2022-09-27 18:07:39+00:00,larepublica_pe,RT @Politica_LR: Ministra Ocampo se compromete a impulsar reglamento de ley de Cooperativas Agrarias \n\nDestacó la importancia del cooperati…,politicalr ministra ocampo se compromete impulsar reglamento de ley de cooperativas agrarias destacó la importancia del cooperati
3642,1642659287687208960,2023-04-02 22:44:27+00:00,Gestionpe,"SBS dispone disolución de cuatro cooperativas, ¿cuáles fueron los motivos? \n\nLa Superintendencia de Banca, Seguros y AFP (SBS) designó a administradores temporales. Los detalles a continuación.\n\nhttps://t.co/i5Hi2cbtyb",sbs dispone disolución de cuatro cooperativas cuáles fueron los motivos la superintendencia de banca seguros afp sbs designó administradores temporales los detalles continuación
4921,1640975963071684611,2023-03-29 07:15:32+00:00,diariocorreo,Policía llega a calmar bronca en Cooperativa de Huancavelica entre delegados y directivos https://t.co/5nMSRuRVwG,policía llega calmar bronca en cooperativa de huancavelica entre delegados directivos
6172,1641854116455669762,2023-03-31 17:25:00+00:00,DiarioElPeruano,La @SBSPERU declara la disolución de cuatro cooperativas de ahorro y crédito. Conoce aquí cuáles fueron las causales👉https://t.co/FRIhKnOWIE https://t.co/MB71reiQkD,la sbsperu declara la disolución de cuatro cooperativas de ahorro crédito conoce aquí cuáles fueron las causales
4549,1643305720022982672,2023-04-04 17:33:09+00:00,diariocorreo,Arequipa: 400 socios preocupados por disolución de cooperativa Alto Selva Alegre https://t.co/KrVl6YjNOM,arequipa socios preocupados por disolución de cooperativa alto selva alegre


In [18]:
filtered_corpus.to_csv("/Users/drea/Documents/Projects/journalScrapping/data/tweets.csv")