In [18]:
import pandas as pd
import os
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go

In [20]:
class CommitSentimentAnalysis:
    def __init__(self, data_path: str):
        self.data_path = data_path
        self.commits_path = r'Commits Path'
        self.authors_path = r'Authors Path'

        self.authors = pd.read_csv(self.authors_path)

        self.data = pd.read_json(data_path, lines=True)
        self.normalize_data()
        self.uniform_data()

        self.commits = pd.read_csv(self.commits_path, parse_dates=["date"])
        self.combined_data = self.merge_sentiment_with_commits()

        self.template = 'plotly_white'

    def normalize_data(self):
        self.data = pd.json_normalize(self.data['response'].apply(eval))

    def uniform_data(self):
        self.data['sentiment'] = self.data['sentiment'].str.lower()

    def merge_sentiment_with_commits(self):
        merged = self.commits.copy()
        merged['sentiment'] = self.data['sentiment']

        id_to_name = dict(zip(self.authors['id'], self.authors['name']))
        merged['author_name'] = merged['author_id'].map(id_to_name)

        return merged

    def plot_sentiment_distribution(self):
        sentiment_counts = self.data['sentiment'].value_counts()
        fig = px.bar(
            sentiment_counts, 
            x=sentiment_counts.index, 
            y=sentiment_counts.values, 
            title='Distribuição de Sentimentos',
            template=self.template
        )
        
        fig.update_traces(
            marker_color='rgb(158,202,225)',
            marker_line_color='rgb(8,48,107)',
            marker_line_width=1.5,
            opacity=0.8
        )
        
        fig.update_layout(
            title_x=0.5,
            title_font_size=24,
            xaxis_title="Sentimento",
            yaxis_title="Quantidade",
            font=dict(size=14),
            showlegend=False,
            plot_bgcolor='white',
            paper_bgcolor='white',
            margin=dict(t=100, l=70, r=40, b=70)
        )
        
        return fig

    def plot_sentiment_trend(self, freq='W'):
        df = self.combined_data.copy()
        df['date'] = pd.to_datetime(df['date'])
        df.set_index('date', inplace=True)

        sentiment_time_series = df.groupby([pd.Grouper(freq=freq), 'sentiment']).size().reset_index(name='count')

        color_map = {
            'positive': '#4CAF50',  
            'neutral': '#9E9E9E',  
            'negative': '#F44336'   
        }

        fig = px.line(
            sentiment_time_series,
            x='date',
            y='count',
            color='sentiment',
            color_discrete_map=color_map,
            title='Tendência de Sentimentos ao Longo do Tempo',
            template=self.template
        )

        fig.update_traces(line=dict(width=4))

        fig.update_layout(
            title_x=0.5,
            title_font_size=24,
            xaxis_title="Data",
            yaxis_title="Número de Commits",
            font=dict(size=14),
            plot_bgcolor='white',
            paper_bgcolor='white',
            margin=dict(t=100, l=70, r=40, b=70)
        )

        return fig

    def plot_sentiment_vs_commit_size(self, metric='insertions'):
        df = self.combined_data.copy()

        sentiments = ['positive', 'neutral', 'negative']

        color_map = {
            'positive': '#4CAF50',
            'neutral': '#9E9E9E',
            'negative': '#F44336'
        }

        figures = {}

        for sentiment in sentiments:
            filtered_df = df[df['sentiment'] == sentiment]

            fig = px.violin(
                filtered_df,
                y=metric,
                box=True,
                color_discrete_sequence=[color_map[sentiment]],
                title=f'{sentiment.capitalize()} – Distribuição de {metric.capitalize()}',
                template=self.template
            )

            fig.update_layout(
                title_x=0.5,
                font=dict(size=14),
                yaxis_title=metric.capitalize(),
                xaxis_visible=False,
                plot_bgcolor='white',
                paper_bgcolor='white',
                height=400,
                margin=dict(t=80, l=60, r=40, b=60)
            )

            figures[sentiment] = fig

        return figures

    def plot_individual_sentiment_by_weekday(self):
        
        df = self.combined_data.copy()
        df['hour'] = df['date'].dt.hour
        df['weekday'] = df['date'].dt.day_name()

        weekday_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
        df['weekday'] = pd.Categorical(df['weekday'], categories=weekday_order, ordered=True)

        sentiment_time = df.groupby(['weekday', 'hour', 'sentiment']).size().reset_index(name='count')

        color_map = {
            'positive': '#4CAF50',
            'neutral': '#9E9E9E',
            'negative': '#F44336'
        }

        figures = {}

        for day in weekday_order:
            daily_df = sentiment_time[sentiment_time['weekday'] == day]

            fig = px.line(
                daily_df,
                x='hour',
                y='count',
                color='sentiment',
                color_discrete_map=color_map,
                title=f'{day} - Padrão de Sentimentos por Hora',
                template=self.template
            )

            fig.update_traces(line=dict(width=5), mode='lines+markers')

            fig.update_layout(
                title_x=0.5,
                font=dict(size=14),
                xaxis_title='Hora do Dia',
                yaxis_title='Número de Commits',
                legend_title_text='Sentimento',
                plot_bgcolor='white',
                paper_bgcolor='white',
                margin=dict(t=80, l=60, r=40, b=60),
                height=400
            )

            fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='lightgray')

            figures[day] = fig

        return figures

    def plot_sentiment_by_author(self, top_n=10):
        import plotly.graph_objects as go
        import pandas as pd

        df = self.combined_data.copy()
        sentiment_author = df.groupby(['author_name', 'sentiment']).size().reset_index(name='count')

        top_authors = (
            sentiment_author.groupby('author_name')['count']
            .sum()
            .nlargest(top_n)
            .index
        )

        sentiment_author = sentiment_author[sentiment_author['author_name'].isin(top_authors)]

        pivot_df = sentiment_author.pivot(index='author_name', columns='sentiment', values='count').fillna(0)
        pivot_df = pivot_df.reindex(top_authors)  

        sentiment_colors = {
            'positive': '#2ECC71',
            'neutral': '#95A5A6',
            'negative': '#E74C3C'
        }

        fig = go.Figure()

        for sentiment, color in sentiment_colors.items():
            if sentiment in pivot_df.columns:
                fig.add_trace(
                    go.Bar(
                        name=sentiment.capitalize(),
                        x=pivot_df.index,
                        y=pivot_df[sentiment],
                        marker_color=color,
                        hovertemplate=(
                            "<b>%{x}</b><br>" +
                            "Sentimento: " + sentiment.capitalize() + "<br>" +
                            "Quantidade: %{y}<br>" +
                            "<extra></extra>"
                        )
                    )
                )

        fig.update_layout(
            title={
                'text': 'Análise de Sentimento por Autor',
                'y': 0.95,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top',
                'font': {'size': 24}
            },
            font=dict(family="Arial", size=14),
            xaxis_title="Autor",
            yaxis_title="Quantidade de Commits",
            barmode='group',
            template='plotly_white',
            height=600,
            bargap=0.15,
            bargroupgap=0.05,
            margin=dict(l=60, r=60, t=100, b=60),
            legend=dict(
                yanchor="top",
                y=0.99,
                xanchor="right",
                x=0.99,
                bgcolor='rgba(255, 255, 255, 0.8)',
                bordercolor='rgba(0, 0, 0, 0.2)',
                borderwidth=1
            ),
            plot_bgcolor='white'
        )

        fig.update_yaxes(
            showgrid=True,
            gridwidth=1,
            gridcolor='rgba(0, 0, 0, 0.1)'
        )

        return fig
    
    
    def plot_word_frequency_by_sentiment(self, top_n=10):
        from collections import Counter
        import re
        
        df = self.combined_data.copy()
        
        def get_word_freq(messages):
            words = ' '.join(messages).lower()
            words = re.findall(r'\w+', words)
            return Counter(words).most_common(top_n)
        
        sentiment_words = {}
        for sentiment in df['sentiment'].unique():
            messages = df[df['sentiment'] == sentiment]['message']
            sentiment_words[sentiment] = get_word_freq(messages)
        
        fig = go.Figure()
        
        for sentiment, words in sentiment_words.items():
            words, counts = zip(*words)
            fig.add_trace(go.Bar(
                name=sentiment,
                x=words,
                y=counts,
                marker_color={
                    'positive': '#4CAF50',
                    'neutral': '#9E9E9E',
                    'negative': '#F44336'
                }[sentiment]
            ))
        
        fig.update_layout(
            barmode='group',
            title='Palavras Mais Frequentes por Sentimento',
            xaxis_title='Palavras',
            yaxis_title='Frequência'
        )
        
        return fig
    
    def plot_sentiment_by_commit_type(self):
        df = self.combined_data.copy()
        
        def get_commit_type(message):
            message = message.lower()
            if 'fix' in message or 'bug' in message:
                return 'Correção'
            elif 'feat' in message or 'feature' in message:
                return 'Nova Funcionalidade'
            elif 'refactor' in message:
                return 'Refatoração'
            elif 'docs' in message:
                return 'Documentação'
            else:
                return 'Outros'
        
        df['commit_type'] = df['message'].apply(get_commit_type)
        
        fig = px.sunburst(
            df,
            path=['commit_type', 'sentiment'],
            title='Distribuição de Sentimentos por Tipo de Commit',
            color='sentiment',
            color_discrete_map={
                'positive': '#4CAF50',
                'neutral': '#9E9E9E',
                'negative': '#F44336'
            }
        )
        
        return fig

    def plot_commit_complexity_vs_sentiment(self):
        df = self.combined_data.copy()
        df['complexity'] = df['insertions'] + df['deletions']
        df['message_length'] = df['message'].str.len()
        
        x_limit = df['complexity'].quantile(0.95)
        
        fig = px.scatter(
            df,
            x='complexity',
            y='message_length',
            color='sentiment',
            title='Complexidade do Commit vs Tamanho da Mensagem por Sentimento',
            color_discrete_map={
                'positive': '#4CAF50',
                'neutral': '#9E9E9E',
                'negative': '#F44336'
            },
            labels={
                'complexity': 'Complexidade (Inserções + Deleções)',
                'message_length': 'Tamanho da Mensagem'
            }
        )
        
        fig.update_xaxes(range=[0, x_limit])
        
        fig.update_layout(
            template=self.template,
            title_x=0.5,
            title_font_size=24,
            font=dict(size=14),
            plot_bgcolor='white',
            paper_bgcolor='white',
            margin=dict(t=100, l=70, r=40, b=70)
        )
        
        fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0, 0, 0, 0.1)')
        fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0, 0, 0, 0.1)')
        
        return fig
    
    def plot_sentiment_radar_metrics(self):
        import plotly.graph_objects as go
        
        df = self.combined_data.copy()
        
        metrics_display = {
            'insertions': 'Inserções',
            'deletions': 'Deleções',
            'files_changed': 'Arquivos Modificados',
            'dmm_unit_size': 'Tamanho da Unidade',
            'dmm_unit_complexity': 'Complexidade',
            'dmm_unit_interfacing': 'Interfaceamento'
        }
        
        metrics = list(metrics_display.keys())
        
        for metric in metrics:
            if metric in df.columns:
                df[f'{metric}_norm'] = df[metric] / df[metric].max()
        
        normalized_metrics = [m + '_norm' for m in metrics if m in df.columns]
        
        radar_data = df.groupby('sentiment')[normalized_metrics].mean()
        
        colors = {
            'positive': '#2ECC71',  
            'negative': '#E74C3C' 
        }
        
        fig = go.Figure()
        
        for sentiment in radar_data.index:
            fig.add_trace(go.Scatterpolar(
                r=radar_data.loc[sentiment],
                theta=[metrics_display[m.replace('_norm', '')] for m in normalized_metrics],
                name=sentiment.capitalize(),
                line=dict(color=colors[sentiment], width=3),
                fill='toself',
                fillcolor=colors[sentiment].replace(')', ', 0.2)').replace('rgb', 'rgba')
            ))
        
        fig.update_layout(
            polar=dict(
                radialaxis=dict(
                    visible=True,
                    range=[0, 1],
                    tickfont=dict(size=12),
                    gridcolor='rgba(0, 0, 0, 0.1)'
                ),
                angularaxis=dict(
                    tickfont=dict(size=12, family='Arial'),
                    gridcolor='rgba(0, 0, 0, 0.1)'
                ),
                bgcolor='white'
            ),
            showlegend=True,
            legend=dict(
                x=1.2,
                y=0.5,
                font=dict(size=12, family='Arial'),
                bgcolor='rgba(255, 255, 255, 0.8)',
                bordercolor='rgba(0, 0, 0, 0.2)',
                borderwidth=1
            ),
            title=dict(
                text='Perfil de Métricas por Sentimento',
                x=0.5,
                y=0.95,
                font=dict(size=24, family='Arial', color='#2C3E50')
            ),
            paper_bgcolor='white',
            plot_bgcolor='white',
            margin=dict(t=100, b=100, l=100, r=150)
        )
        
        return fig
    
    def plot_sentiment_radar_temporal(self):
        import plotly.graph_objects as go
        
        df = self.combined_data.copy()
        
        df['hour'] = df['date'].dt.hour
        period_labels = ['Madrugada\n(00h-06h)', 
                        'Manhã\n(06h-12h)', 
                        'Tarde\n(12h-18h)', 
                        'Noite\n(18h-24h)']
        
        df['period'] = pd.cut(df['hour'], 
                            bins=[0, 6, 12, 18, 24],
                            labels=period_labels)
        
        period_sentiment = pd.crosstab(df['period'], 
                                    df['sentiment'], 
                                    normalize='index')
        
        colors = {
            'positive': '#2ECC71',
            'neutral': '#95A5A6',
            'negative': '#E74C3C'
        }
        
        fig = go.Figure()
        
        for sentiment in period_sentiment.columns:
            fig.add_trace(go.Scatterpolar(
                r=period_sentiment[sentiment],
                theta=period_sentiment.index,
                name=sentiment.capitalize(),
                line=dict(color=colors[sentiment], width=3),
                fill='toself',
                fillcolor=colors[sentiment].replace(')', ', 0.2)').replace('rgb', 'rgba')
            ))
        
        fig.update_layout(
            polar=dict(
                radialaxis=dict(
                    visible=True,
                    range=[0, period_sentiment.values.max() * 1.2],
                    tickformat=',.0%',
                    tickfont=dict(size=12),
                    gridcolor='rgba(0, 0, 0, 0.1)'
                ),
                angularaxis=dict(
                    tickfont=dict(size=12, family='Arial'),
                    gridcolor='rgba(0, 0, 0, 0.1)'
                ),
                bgcolor='white'
            ),
            showlegend=True,
            legend=dict(
                x=1.2,
                y=0.5,
                font=dict(size=12, family='Arial'),
                bgcolor='rgba(255, 255, 255, 0.8)',
                bordercolor='rgba(0, 0, 0, 0.2)',
                borderwidth=1
            ),
            title=dict(
                text='Distribuição de Sentimentos ao Longo do Dia',
                x=0.5,
                y=0.95,
                font=dict(size=24, family='Arial', color='#2C3E50')
            ),
            paper_bgcolor='white',
            plot_bgcolor='white',
            margin=dict(t=100, b=100, l=100, r=150)
        )
        
        return fig
    
    def plot_author_radar_metrics(self, top_n=5):
        import plotly.graph_objects as go
        import numpy as np
        
        df = self.combined_data.copy()
        
        metrics_display = {
            'insertions': 'Inserções',
            'deletions': 'Deleções',
            'files_changed': 'Arquivos Modificados',
            'dmm_unit_size': 'Tamanho da Unidade',
            'dmm_unit_complexity': 'Complexidade',
            'dmm_unit_interfacing': 'Interfaceamento'
        }
        
        top_authors = df['author_name'].value_counts().nlargest(top_n).index
        df_top = df[df['author_name'].isin(top_authors)].copy() 

        metrics = list(metrics_display.keys())
        for metric in metrics:
            if metric in df.columns:
                df_top.loc[:, f'{metric}_norm'] = df_top[metric] / df_top[metric].max()
        
        normalized_metrics = [m + '_norm' for m in metrics if m in df.columns]
        
        radar_data = df_top.groupby('author_name')[normalized_metrics].mean()
        
        import plotly.express as px
        colors = px.colors.qualitative.Set3[:top_n] 
        
        fig = go.Figure()
        
        for idx, author in enumerate(radar_data.index):
            fig.add_trace(go.Scatterpolar(
                r=radar_data.loc[author].values, 
                theta=[metrics_display[m.replace('_norm', '')] for m in normalized_metrics],
                name=author,
                line=dict(color=colors[idx], width=3),
                fill='toself',
                opacity=0.7
            ))
        
        fig.update_layout(
            polar=dict(
                radialaxis=dict(
                    visible=True,
                    range=[0, 1],
                    tickfont=dict(size=12),
                    gridcolor='rgba(0, 0, 0, 0.1)'
                ),
                angularaxis=dict(
                    tickfont=dict(size=12, family='Arial'),
                    gridcolor='rgba(0, 0, 0, 0.1)'
                ),
                bgcolor='white'
            ),
            showlegend=True,
            legend=dict(
                x=1.2,
                y=0.5,
                font=dict(size=12, family='Arial'),
                bgcolor='rgba(255, 255, 255, 0.8)',
                bordercolor='rgba(0, 0, 0, 0.2)',
                borderwidth=1
            ),
            title=dict(
                text=f'Perfil de Desenvolvimento por Autor (Top {top_n})',
                x=0.5,
                y=0.95,
                font=dict(size=24, family='Arial', color='#2C3E50')
            ),
            paper_bgcolor='white',
            plot_bgcolor='white',
            margin=dict(t=100, b=100, l=100, r=150)
        )
        
        return fig