In [85]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
import base64
from io import BytesIO
import matplotlib.pyplot as plt
import seaborn as sns



In [86]:

class DataAnalysisReporter:
    def __init__(self, df):
        self.df = df
        self.report_data = {}
    
    def generate_basic_info(self):
        """Gera informações básicas sobre o DataFrame"""
        self.report_data['basic_info'] = {
            'num_rows': len(self.df),
            'num_columns': len(self.df.columns),
            'memory_usage': self.df.memory_usage().sum() / 1024**2,  # Em MB
            'duplicated_rows': self.df.duplicated().sum()
        }
    
    def generate_column_info(self):
        """Gera informações detalhadas sobre cada coluna"""
        self.report_data['column_info'] = {}
        
        for column in self.df.columns:
            col_data = self.df[column]
            col_info = {
                'dtype': str(col_data.dtype),
                'null_count': col_data.isnull().sum(),
                'null_percentage': (col_data.isnull().sum() / len(col_data)) * 100,
                'unique_values': col_data.nunique()
            }
            
            if pd.api.types.is_numeric_dtype(col_data):
                col_info.update({
                    'mean': col_data.mean(),
                    'std': col_data.std(),
                    'min': col_data.min(),
                    'max': col_data.max(),
                    'quartiles': col_data.quantile([0.25, 0.5, 0.75]).to_dict()
                })
                
            self.report_data['column_info'][column] = col_info
    
    def generate_correlation_matrix(self):
        """Gera matriz de correlação para variáveis numéricas"""
        numeric_cols = self.df.select_dtypes(include=[np.number]).columns
        if len(numeric_cols) > 1:
            corr_matrix = self.df[numeric_cols].corr()
            
            plt.figure(figsize=(10, 8))
            sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)
            plt.title('Matriz de Correlação')
            
            buffer = BytesIO()
            plt.savefig(buffer, format='png', bbox_inches='tight')
            buffer.seek(0)
            image_png = buffer.getvalue()
            buffer.close()
            
            self.report_data['correlation_matrix'] = base64.b64encode(image_png).decode()
            plt.close()
    
    def _generate_column_html(self):
        """Gera o HTML para as informações das colunas"""
        html = ""
        for column, info in self.report_data['column_info'].items():
            html += f"""
            <h3>Coluna: {column}</h3>
            <table>
                <tr><th>Métrica</th><th>Valor</th></tr>
                <tr><td>Tipo de Dados</td><td>{info['dtype']}</td></tr>
                <tr><td>Valores Nulos</td><td>{info['null_count']} ({info['null_percentage']:.2f}%)</td></tr>
                <tr><td>Valores Únicos</td><td>{info['unique_values']}</td></tr>
            """
            
            if 'mean' in info:
                html += f"""
                <tr><td>Média</td><td>{info['mean']:.2f}</td></tr>
                <tr><td>Desvio Padrão</td><td>{info['std']:.2f}</td></tr>
                <tr><td>Mínimo</td><td>{info['min']:.2f}</td></tr>
                <tr><td>Máximo</td><td>{info['max']:.2f}</td></tr>
                <tr><td>1º Quartil</td><td>{info['quartiles'][0.25]:.2f}</td></tr>
                <tr><td>Mediana</td><td>{info['quartiles'][0.5]:.2f}</td></tr>
                <tr><td>3º Quartil</td><td>{info['quartiles'][0.75]:.2f}</td></tr>
                """
            
            html += "</table>"
        return html
    
    def generate_html_report(self):
        """Gera o relatório HTML final"""
        self.generate_basic_info()
        self.generate_column_info()
        self.generate_correlation_matrix()
        
        html = f"""
        <html>
        <head>
            <title>Relatório de Análise de Dados</title>
            <style>
                body {{ font-family: Arial, sans-serif; margin: 20px; }}
                .section {{ margin: 20px 0; padding: 20px; border: 1px solid #ddd; border-radius: 5px; }}
                table {{ border-collapse: collapse; width: 100%; }}
                th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
                th {{ background-color: #f2f2f2; }}
            </style>
        </head>
        <body>
            <h1>Relatório de Análise de Dados</h1>
            <div class="section">
                <h2>Informações Básicas</h2>
                <table>
                    <tr><th>Métrica</th><th>Valor</th></tr>
                    <tr><td>Número de Linhas</td><td>{self.report_data['basic_info']['num_rows']}</td></tr>
                    <tr><td>Número de Colunas</td><td>{self.report_data['basic_info']['num_columns']}</td></tr>
                    <tr><td>Uso de Memória (MB)</td><td>{self.report_data['basic_info']['memory_usage']:.2f}</td></tr>
                    <tr><td>Linhas Duplicadas</td><td>{self.report_data['basic_info']['duplicated_rows']}</td></tr>
                </table>
            </div>
            
            <div class="section">
                <h2>Informações das Colunas</h2>
                {self._generate_column_html()}
            </div>
            
            <div class="section">
                <h2>Matriz de Correlação</h2>
                <img src="data:image/png;base64,{self.report_data['correlation_matrix']}" alt="Correlation Matrix">
            </div>
        </body>
        </html>
        """
        
        return html

In [87]:
def generate_data_analysis_report(df, output_file='report.html'):
    reporter = DataAnalysisReporter(df)
    html_report = reporter.generate_html_report()
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_report)
    
    return f"Relatório gerado com sucesso: {output_file}"

In [88]:
# Carregar o dataset
df = pd.read_csv('atlantic.csv')

# Gerar o relatório
resultado = generate_data_analysis_report(df, 'atlantic_report.html')
print(resultado)

Relatório gerado com sucesso: atlantic_report.html
