<a href="https://colab.research.google.com/github/crossmodais/app/blob/main/io.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from google.colab import drive
import requests
import io

def mount_google_drive():
    drive.mount('/content/drive')

def get_drive_path(relative_path):
    return os.path.join('/content/drive/MyDrive', relative_path)

def ensure_directory_exists_on_drive(relative_path):
    drive_path = get_drive_path(relative_path)
    if not os.path.exists(drive_path):
        os.makedirs(drive_path, exist_ok=True)
    return drive_path

def save_fig(fig, filename, drive_folder_path):
    try:
        filepath = os.path.join(drive_folder_path, filename)
        fig.savefig(filepath, dpi=300, bbox_inches='tight')
    except Exception as e:
        pass
    finally:
        plt.close(fig)

def create_figure():
    return plt.figure(figsize=(10, 6))

def generate_statistical_summary(df, filename="sumario.txt", drive_path="/content/drive/MyDrive"):
    try:
        summary = df.describe(include='all').to_string()
        filepath = os.path.join(drive_path, filename)
        with open(filepath, 'w') as f:
            f.write(summary)
    except Exception as e:
        pass

def generate_lineplot(df, x_col, y_col, title, filename, drive_folder_path, hue=None):
    try:
        if x_col not in df.columns or y_col not in df.columns:
            return
        fig = create_figure()
        top_hues = df[hue].value_counts().index[:6] if hue else None
        data = df[df[hue].isin(top_hues)] if hue else df
        sns.lineplot(x=x_col, y=y_col, data=data, hue=hue, palette="viridis")
        plt.title(title)
        save_fig(fig, filename, drive_folder_path)
    except Exception as e:
        pass

def generate_violinplot(df, x_col, y_col, title, filename, drive_folder_path, hue=None):
    try:
        if x_col not in df.columns or y_col not in df.columns:
            return
        fig = create_figure()
        top_hues = df[hue].value_counts().index[:6] if hue else None
        data = df[df[hue].isin(top_hues)] if hue else df
        sns.violinplot(x=x_col, y=y_col, data=data, hue=hue, palette="viridis")
        plt.title(title)
        save_fig(fig, filename, drive_folder_path)
    except Exception as e:
        pass

def generate_kdeplot(df, col, title, filename, drive_folder_path, hue=None):
    try:
        if col not in df.columns:
            return
        fig = create_figure()
        top_hues = df[hue].value_counts().index[:6] if hue else None
        data = df[df[hue].isin(top_hues)] if hue else df
        sns.kdeplot(data=data, x=col, hue=hue, palette="viridis")
        plt.title(title)
        save_fig(fig, filename, drive_folder_path)
    except Exception as e:
        pass

def generate_stacked_barplot(df, x_col, y_col, title, filename, drive_folder_path, hue=None):
    try:
        if x_col not in df.columns or y_col not in df.columns:
            return
        fig = create_figure()
        top_hues = df[hue].value_counts().index[:6] if hue else None
        data = df[df[hue].isin(top_hues)] if hue else df
        pivot_table = pd.pivot_table(data, values=y_col, index=x_col, columns=hue, aggfunc=np.sum, fill_value=0)
        pivot_table.plot(kind='bar', stacked=True, colormap="viridis", ax=plt.gca())
        plt.title(title)
        save_fig(fig, filename, drive_folder_path)
    except Exception as e:
        pass

if __name__ == "__main__":
    try:
        mount_google_drive()
        graficos_drive_path = ensure_directory_exists_on_drive('data')
        url = "https://docs.google.com/spreadsheets/d/1e8otjghJ3PGvUNnQZevu6bZ0ktjBfWWUXvkRm61_y_8/export?format=csv"
        response = requests.get(url)
        response.raise_for_status()
        df = pd.read_csv(io.StringIO(response.text))
        df.dropna(axis=1, how='all', inplace=True)
        for col in df.select_dtypes(include=np.number):
            df[col] = df[col].fillna(df[col].median())
        if 'Regiao' in df:
            df['Regiao_encoded'] = pd.Categorical(df['Regiao']).codes
        if 'Setor Mais Atacado' in df:
            df['Setor_encoded'] = pd.Categorical(df['Setor Mais Atacado']).codes

        generate_lineplot(df, "Ano", "Ataques_Ciberneticos_num", "1. Evolução de Ataques Cibernéticos", "1_lineplot_ataques.png", graficos_drive_path, hue="Regiao_encoded")
        generate_violinplot(df, "Regiao", "Investimento_Ciberseguranca_per_capita_R_", "2. Investimento por Região", "2_violin_investimento_regiao.png", graficos_drive_path, hue="Regiao_encoded")
        generate_kdeplot(df, "Impacto_Financeiro_Ataques_R_Milhoes", "3. KDE do Impacto Financeiro", "3_kde_impacto_financeiro.png", graficos_drive_path, hue="Regiao_encoded")
        generate_stacked_barplot(df, "Ano", "Ataques_Ciberneticos_num", "4. Ataques por Ano e Região", "4_stacked_barplot_ataques.png", graficos_drive_path, hue="Regiao_encoded")
        generate_lineplot(df, "Ano", "IDH", "5. Evolução do IDH", "5_lineplot_idh.png", graficos_drive_path, hue="Regiao_encoded")
        generate_violinplot(df, "Regiao", "Indice_de_Gini", "6. Índice de Gini por Região", "6_violin_gini_regiao.png", graficos_drive_path, hue="Regiao_encoded")
        generate_kdeplot(df, "Escolaridade_Superior__", "7. KDE da Escolaridade Superior", "7_kde_escolaridade_superior.png", graficos_drive_path, hue="Regiao_encoded")
        generate_stacked_barplot(df, "Ano", "Investimento_Ciberseguranca_per_capita_R_", "8. Investimento por Ano e Região", "8_stacked_barplot_investimento.png", graficos_drive_path, hue="Regiao_encoded")
        generate_lineplot(df, "Ano", "Impacto_Financeiro_Ataques_R_Milhoes", "9. Evolução do Impacto Financeiro", "9_lineplot_impacto_financeiro.png", graficos_drive_path, hue="Regiao_encoded")
        generate_violinplot(df, "Regiao", "Ataques_Ciberneticos_num", "10. Ataques por Região", "10_violin_ataques_regiao.png", graficos_drive_path, hue="Regiao_encoded")
        generate_kdeplot(df, "IDH", "11. KDE do IDH", "11_kde_idh.png", graficos_drive_path, hue="Regiao_encoded")
        generate_stacked_barplot(df, "Ano", "Indice_de_Gini", "12. Gini por Ano e Região", "12_stacked_barplot_gini.png", graficos_drive_path, hue="Regiao_encoded")
        generate_lineplot(df, "Ano", "Escolaridade_Superior__", "13. Evolução da Escolaridade Superior", "13_lineplot_escolaridade_superior.png", graficos_drive_path, hue="Regiao_encoded")
        generate_violinplot(df, "Regiao", "Impacto_Financeiro_Ataques_R_Milhoes", "14. Impacto Financeiro por Região", "14_violin_impacto_financeiro_regiao.png", graficos_drive_path, hue="Regiao_encoded")
        generate_kdeplot(df, "Investimento_Ciberseguranca_per_capita_R_", "15. KDE do Investimento em Cibersegurança", "15_kde_investimento_ciberseguranca.png", graficos_drive_path, hue="Regiao_encoded")

        generate_statistical_summary(df, "sumario.txt", graficos_drive_path)
    except requests.exceptions.RequestException as e:
        pass
    except Exception as e:
        pass

Mounted at /content/drive
