# MDMS - PEC1 - Adrián José Zapater Reig

## Extracción de Nodos y Vértices

In [1]:
import pandas as pd
import csv
import json

In [2]:
# Contenedores de los datos recogidos de la API de twitter.

class BaseUser(object):
    def __init__(self, id, username):
        self.id = id
        self.username = username
        
    def __str__(self):
        return self.__repr__()
    
    def __repr__(self):
        return '{{ id={}, username={} }}'.format(self.id, self.username)
    
    def __hash__(self):
        return hash((self.id, self.username))
    
    def __eq__(self, other):
        if not isinstance(other, type(self)): return NotImplemented
        return self.id == other.id and self.username == other.username
        
    def from_json(json):
        users = []
        for user in json.get('data', {}):
            users.append(BaseUser(user['id'], user['username']))
        return users
        

class FullUser(BaseUser):
    def __init__(self, id, username, location, num_followers, num_following, num_tweets, is_verfied):
        super().__init__(id,username)
        self.location = location
        self.followers_count = num_followers
        self.following_count = num_following
        self.tweet_count = num_tweets
        self.verified = True if str(is_verfied).lower() == 'true' else False
        
    def __repr__(self):
        return '{{ id={}, username={}, location={}, followers={}, following={}, tweets={}, verified={} }}'.format(self.id, self.username, self.location, self.followers_count, self.following_count, self.tweet_count, self.verified)
        
    def from_json(json):
        users = []
        for user in json.get('data', {}):
            pub_metrics = user.get('public_metrics', {})
            new_user = FullUser(user.get('id'), user.get('username'), user.get('location'), pub_metrics.get('followers_count'), 
                                pub_metrics.get('following_count'), pub_metrics.get('tweet_count'), user.get('verified'))
            users.append(new_user)
        return users
        

In [3]:
class Relation(object):
    
    def __init__(self, src, dst):
        self.src = src
        self.dst = dst
    
    def __repr__(self):
        return '{{ source={}, destination={} }}'.format(self.src, self.dst)
    
    def __str__(self):
        return self.__repr__()
    
    def __hash__(self):
        return hash((self.src, self.dst))
    
    def __eq__(self, other):
        if not isinstance(other, type(self)): return NotImplemented
        return self.src == other.src and self.dst == other.dst        
        

In [4]:
# Clase que gestiona la autenticacion contra twitter y las llamadas.

import requests

class TwitterConnection:
    def __init__(self, bearer_token, content_type='application/json; charset=utf-8'):
        self.__headers = {
            'Authorization':'Bearer ' + bearer_token,
            'content-type': content_type
        }
        
    def set_content_type(self, content_type):
        self.__headers['content-type'] = content_type
    
    def get_headers(self):
        return self.__headers
    
    def request(self, url, op='GET', payload={}):
        print('URL:' + url)
        response = requests.request(op, url, headers=self.__headers, data=payload)
        print('response code: {}'.format(response.status_code))
        result = response.text if response.status_code == 200 else'{}'
        return json.loads(result) 


In [5]:
# Extractor de Twitter desde la API V2.
class TwitterV2ApiExtractor:
    def __init__(self, connection):
        self.__base_url='https://api.twitter.com/2/'
        self.__connection = connection
        
    def get_user_by_username(self, *args, return_fields=['public_metrics','username','verified','location']):
        url = self.__base_url + 'users/by?usernames={}&user.fields={}'.format(','.join(args), ','.join(return_fields))
        return self.__connection.request(url)
    
    def get_following_by_user_id(self, user_id, max_results=1000, return_fields=['public_metrics','username','verified','location']):
        url = self.__base_url + 'users/{}/following?user.fields={}&max_results={}'.format(user_id, ','.join(return_fields), max_results)
        return self.__connection.request(url)
        

## Extracción

In [6]:
# Usuarios base de la extraccion.
usernames = ['Charles_Leclerc','PierreGASLY','alo_oficial','Carlossainz55','GeorgeRussell63','LewisHamilton','danielricciardo','LandoNorris', 'SchumacherMick']

# Token para autenticarnos contra Twitter. Cada uno debe poner el suyo propio.
token = ''

In [7]:
# Inicializamos la conexion y el extractor:
conn = TwitterConnection(token)

In [8]:
ext = TwitterV2ApiExtractor(conn)

In [9]:
# Usuarios principales:
primary_users_json = ext.get_user_by_username(*usernames)
primary_users = FullUser.from_json(primary_users_json)
primary_users

URL:https://api.twitter.com/2/users/by?usernames=Charles_Leclerc,PierreGASLY,alo_oficial,Carlossainz55,GeorgeRussell63,LewisHamilton,danielricciardo,LandoNorris,SchumacherMick&user.fields=public_metrics,username,verified,location
response code: 200


[{ id=262230432, username=Charles_Leclerc, location=Monaco, followers=1255358, following=167, tweets=2008, verified=True },
 { id=537951506, username=PierreGASLY, location=Rouen, France, followers=732789, following=506, tweets=2670, verified=True },
 { id=507672047, username=alo_oficial, location=instagram: fernandoalo_oficial, followers=2857858, following=133, tweets=8465, verified=True },
 { id=353786894, username=Carlossainz55, location=planet earth, followers=1170570, following=338, tweets=8242, verified=True },
 { id=394892872, username=GeorgeRussell63, location=None, followers=856888, following=344, tweets=2776, verified=True },
 { id=213969309, username=LewisHamilton, location=All around the World!!, followers=6549953, following=805, tweets=1114, verified=True },
 { id=214413743, username=danielricciardo, location=Monaco/Australia , followers=2361654, following=193, tweets=2497, verified=True },
 { id=516464106, username=LandoNorris, location=United Kingdom, followers=1321003, f

In [10]:
# Recuperamos los followings de cada usuario:
all_users = set(primary_users)
all_relations = set()
for prim_usr in primary_users:
    print('Procesando a: ' + prim_usr.username)
    followings_json = ext.get_following_by_user_id(prim_usr.id)
    followings = FullUser.from_json(followings_json)
    print('Followings: ' + str(len(followings)))
    
    for following in followings:
        all_users.add(following)
        all_relations.add(Relation(prim_usr.id, following.id))
        
    print('Usuarios Totales: ' + str(len(all_users)))
    print('Relaciones Totales: ' + str(len(all_relations)))

Procesando a: Charles_Leclerc
URL:https://api.twitter.com/2/users/262230432/following?user.fields=public_metrics,username,verified,location&max_results=1000
response code: 200
Followings: 167
Usuarios Totales: 172
Relaciones Totales: 167
Procesando a: PierreGASLY
URL:https://api.twitter.com/2/users/537951506/following?user.fields=public_metrics,username,verified,location&max_results=1000
response code: 200
Followings: 506
Usuarios Totales: 614
Relaciones Totales: 673
Procesando a: alo_oficial
URL:https://api.twitter.com/2/users/507672047/following?user.fields=public_metrics,username,verified,location&max_results=1000
response code: 200
Followings: 133
Usuarios Totales: 709
Relaciones Totales: 806
Procesando a: Carlossainz55
URL:https://api.twitter.com/2/users/353786894/following?user.fields=public_metrics,username,verified,location&max_results=1000
response code: 200
Followings: 338
Usuarios Totales: 920
Relaciones Totales: 1144
Procesando a: GeorgeRussell63
URL:https://api.twitter.com

## Exportamos la info a un fichero CSV

In [11]:
import datetime
import os

timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H_%M_%S_%f")[:-3]
data_folder = 'data'
edge_file = os.path.join(data_folder, 'edge_{}.csv'.format(timestamp))
node_file = os.path.join(data_folder, 'node_{}.csv'.format(timestamp))

In [12]:
def save_edges_to_csv(out_file, relation_collection):
    with open (out_file, 'a+', newline='') as csvFile:
        csvWriter = csv.writer(csvFile)
        # Header:
        csvWriter.writerow(['Source','Target','Weight'])
        for rel in relation_collection:
            new_row = [rel.src,  rel.dst, 1]
            csvWriter.writerow(new_row)

In [13]:
def save_nodes_to_csv(out_file, user_collection):
    with open (out_file, 'a+', newline='') as csvFile:
        csvWriter = csv.writer(csvFile)
        # Header:
        csvWriter.writerow(['id','label','location', 'num_followers', 'num_followings', 'num_tweets', 'is_verified'])
        for user in user_collection:
            new_row = [user.id,  user.username, user.location, user.followers_count, user.following_count, user.tweet_count, user.verified]
            csvWriter.writerow(new_row)

In [14]:
save_edges_to_csv(edge_file, all_relations)

In [15]:
save_nodes_to_csv(node_file, all_users)