# Marvel Charts Sentiment Analysis

## Table of Contents
* [Packages](#1)
* [Ingestion Functions](#2)
* [Environment Variables](#3)

<a name="1"></a>
## Packages (Pacotes)

In [132]:
import os
from requests import get
from requests.utils import default_headers
from hashlib import md5
from time import time
from dotenv import load_dotenv
load_dotenv()

import pandas as pd

<a name="2"></a>
## Ingestion Utility Functions (Funçõs Utilitárias de Ingestão)

In [135]:
def get_characters(TS, PUBLIC_KEY, PRIVATE_KEY, limit=None, offset=None):
    """
    
    """
    url = 'http://gateway.marvel.com'
    hash_ = md5((TS + PRIVATE_KEY + PUBLIC_KEY).encode('utf-8')).hexdigest()
    endpoint = '/v1/public/characters'

    params = {
        'name': None,
        'ts': TS,
        'apikey': PUBLIC_KEY,
        'hash': hash_,
        'limit': limit,
        'offset': offset
    }

    response = get(url + endpoint, params=params)

    if response.status_code == 200:
        character_json = response.json()['data']['results']
        names = []
        ids = []
        for i in range(len(character_json)):
            names.append(character_json[i]['name'])
            ids.append(character_json[i]['id'])
            
        return names, ids
    else:
        print('Acesso não autorizado!')

In [149]:
class MarvelIngestion():
    """
    
    """
    def __init__(
        self,
        public_key,
        private_key,
        url='http://gateway.marvel.com/v1/public/',
        limit=100,
    ):
        """
        
        """
        super(MarvelIngestion, self).__init__()

        self.public_key = public_key
        self.private_key = private_key
        
        self.url = url
        self.limit = 100

    def get_params(self, offset, format_=None):
        """
        
        """
        ts = str(time())
        hash_ = md5(
            (
                ts + self.private_key + self.public_key
            ).encode('utf-8')
        ).hexdigest()
        params = {
                'ts': ts,
                'apikey': self.public_key,
                'hash': hash_,
                'limit': self.limit,
                'offset': offset,
                'format': format_
            }

        return params
    
    def __call__(self, endpoint, format_=None, offset=0):
        """
        
        """
        examples = []
        
        endpoint = endpoint.lower().strip()
        key = 'name' if endpoint == 'characters' else 'title'
        params = self.get_params(offset=offset, format_=format_)
        headers = default_headers()
        response = get(self.url + endpoint, params=params, headers=headers).json()

        total = response['data']['total']
                
        for page in range(offset, 4501, self.limit):
            print(page, end='\r')
            results = response['data']['results']
            
            for i in range(len(results)):
                description = results[i]['description']
                example = [
                        results[i]['id'],
                        results[i][key],
                        description
                    ]
                if example not in examples and description and len(description) > 4:
                    examples.append(example)
                    
            params = self.get_params(offset=page + self.limit, format_=format_)
            response = get(self.url + endpoint, params=params, headers=headers).json()
        print(page)

        features = ['id', 'title', 'description']
        df = pd.DataFrame(examples, columns=features)

        return df

<a name="3"></a>
## Environment Variables (Variáveis de Ambiente)

In [152]:
PUBLIC_KEY = str(os.environ['MARVEL_PUBLIC_KEY'])
PRIVATE_KEY = str(os.environ['MARVEL_PRIVATE_KEY'])

In [None]:
ingestion = MarvelIngestion(PUBLIC_KEY, PRIVATE_KEY)
df = ingestion(endpoint='comics', format_='comic')
print(df.shape)
df.head()

100

In [None]:
ingestion = MarvelIngestion(PUBLIC_KEY, PRIVATE_KEY)
df_9000 = ingestion(endpoint='comics', format_='comic', offset=4600)
print(df_9000.shape)
df_9000.head()

In [None]:
ingestion = MarvelIngestion(PUBLIC_KEY, PRIVATE_KEY)
df_13500 = ingestion(endpoint='comics', format_='comic', offset=9200)
print(df_13500.shape)
df_13500.head()

# Testes

In [455]:
hash_ = md5((TS + PRIVATE_KEY + PUBLIC_KEY).encode('utf-8')).hexdigest()
params = {
                'ts': TS,
                'apikey': PUBLIC_KEY,
                'hash': hash_,
                'limit': 100,
                'format': 'comic'
            }
url='http://gateway.marvel.com/v1/public/'


response = get(url + 'comics', params=params, headers=default_headers()).json()