In [1]:
##%%
#!pip install pandas
#!pip install requests
#!pip install gql
#!pip install python_dotenv
#!pip install requests-toolbelt
#!pip install matplotlib

import pandas as pd
from dotenv import load_dotenv
import os
from gql.transport.requests import RequestsHTTPTransport
from gql import Client, gql
from matplotlib import pyplot as plt

##%%
# Load the GitHub token from the .env file
load_dotenv("../.env")
token = os.getenv('GH_TOKEN')
if token is None:
    raise ValueError("GitHub token is not set. Check your .env file.")

# Initialize the GraphQL client
def initialize_client():
    transport = RequestsHTTPTransport(
        url='https://api.github.com/graphql',
        headers={'Authorization': f'token {token}'}
    )
    return Client(transport=transport, fetch_schema_from_transport=True)

client = initialize_client()

##%%
# Load GraphQL query from file
def load_query(file_path):
    with open(file_path, 'r') as file:
        return file.read()

# Define paths to query files
comments_query_path = '../queries/bitcoin-core-comments.graphql'
reactions_query_path = '../queries/bitcoin-core-reactions.graphql'

# Load queries
query_comments = load_query(comments_query_path)
query_reactions = load_query(reactions_query_path)

# Define function to execute queries
def execute_query(query):
    return client.execute(gql(query))

# Execute queries
result_comments = execute_query(query_comments)
result_reactions = execute_query(query_reactions)

##%%
# Extract data from reactions
def extract_reactions_data(client, query, keywords):
    reactions_data = []
    issue_cursor = None
    comment_cursor = None
    reaction_cursor = None
    
    while True:
        variables = {
            'issueCursor': issue_cursor,
            'commentCursor': comment_cursor,
            'reactionCursor': reaction_cursor
        }
        
        result = client.execute(gql(query), variable_values=variables)
        issues = result.get('repository', {}).get('issues', {}).get('edges', [])
        
        for issue_edge in issues:
            issue = issue_edge.get('node', {})
            comments = issue.get('comments', {}).get('edges', [])
            for comment_edge in comments:
                comment = comment_edge.get('node', {})
                comment_body = comment.get('body', '')
                comment_date = comment.get('createdAt', '')
                if any(keyword.lower() in comment_body.lower() for keyword in keywords):
                    reactions = comment.get('reactions', {}).get('edges', [])
                    for reaction_edge in reactions:
                        reaction = reaction_edge.get('node', {})
                        if comment_date:
                            reactions_data.append({
                                'type': f"Reaction: {reaction.get('content', '')}",
                                'created_at': comment_date
                            })
            page_info = result.get('reactions', {}).get('issues', {}).get('pageInfo', {})
            issue_cursor = page_info.get('endCursor', None)
            if not page_info.get('hasNextPage', False):
                reaction_cursor = None
        
        page_info = result.get('repository', {}).get('issues', {}).get('pageInfo', {})
        issue_cursor = page_info.get('endCursor', None)
        if not page_info.get('hasNextPage', False):
            break
    
    return reactions_data

# Extract reactions data
keywords = ['security', 'privacy', 'ethics', 'confidentiality', 'integrity']
reactions_data = extract_reactions_data(client, query_reactions, keywords)
reactions_data

##%%
def extract_comments_data(client, query, keywords):
    comments_data = []
    issue_cursor = None
    comment_cursor = None
    
    while True:
        variables = {
            'issueCursor': issue_cursor,
            'commentCursor': comment_cursor
        }
        
        result = client.execute(gql(query), variable_values=variables)
        issues = result.get('repository', {}).get('issues', {}).get('edges', [])
        for issue_edge in issues:
            issue = issue_edge.get('node', {})
            comments = issue.get('comments', {}).get('edges', [])
            for comment_edge in comments:
                comment = comment_edge.get('node', {})
                comment_body = comment.get('body', '')
                comment_date = comment.get('createdAt', '')
                if any(keyword.lower() in comment_body.lower() for keyword in keywords):
                    if comment_date:
                        comments_data.append({'type': 'Comment', 'created_at': comment_date})

            page_info = issue.get('comments', {}).get('pageInfo', {})
            comment_cursor = page_info.get('endCursor', None)
            if not page_info.get('hasNextPage', False):
                comment_cursor = None
                
        page_info = result.get('repository', {}).get('issues', {}).get('pageInfo', {})
        issue_cursor = page_info.get('endCursor', None)
        if not page_info.get('hasNextPage', False):
            break
    
    return comments_data

# Extract comments data
comments_data = extract_comments_data(client, query_comments, keywords)

##%%
def analyze_and_plot_all(data_list):
    # Convert data list to DataFrame
    df = pd.DataFrame(data_list)
    
    # Check if 'created_at' column exists
    if 'created_at' not in df.columns:
        print("Error: 'created_at' column not found in the data")
        print("Data preview:")
        print(df.head())
        return
    
    # Filter out rows with invalid 'created_at' values
    df = df[df['created_at'].apply(lambda x: isinstance(x, str))]  # Keep only string entries
    
    # Convert 'created_at' to datetime, ignoring errors
    df['created_at'] = pd.to_datetime(df['created_at'], errors='coerce')
    df = df.dropna(subset=['created_at'])  # Drop rows where conversion failed
    
    # Set 'created_at' as index
    df.set_index('created_at', inplace=True)
    
    # Count each type by month
    monthly_data = df.groupby('type').resample('MS').size().unstack().fillna(0)
    
    # Reset index to include 'created_at' in the DataFrame
    plot_data = monthly_data.reset_index()
    
    # Check if 'created_at' is in plot_data
    if 'created_at' in plot_data.columns:
        # Convert 'created_at' to period format and add as 'Month'
        plot_data['Month'] = plot_data['created_at'].dt.to_period('M').astype(str)
        plot_data.set_index('Month', inplace=True)
    
        # Plotting
        fig, ax = plt.subplots(figsize=(12, 8))
        plot_data.drop(columns='created_at').plot(ax=ax, marker='o', linestyle='-')
        plt.title('Monthly Interactions Containing Ethical Keywords')
        plt.xlabel('Year/Month')
        plt.ylabel('Number of Interactions')
        plt.legend(title='Interaction Type')
        plt.grid(True)
        plt.tight_layout()
        plt.show()
    else:
        print("Error: 'created_at' column not found in the DataFrame after resetting index.")
        print("Data preview:")
        print(plot_data.head())

# Combine data and plot
all_data = comments_data + reactions_data
analyze_and_plot_all(all_data)

##%%


ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

In [None]:
#!pip install pandas
#!pip install requests
#!pip install gql
#!pip install python_dotenv
#!pip install requests-toolbelt
#!pip install matplotlibimport pandas as pd
from dotenv import load_dotenv
import os
from gql.transport.requests import RequestsHTTPTransport
from gql import Client, gql
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
# Load the GitHub token from the .env file
load_dotenv("../.env")
token = os.getenv('GH_TOKEN')
if token is None:
    raise ValueError("GitHub token is not set. Check your .env file.")

# Initialize the GraphQL client
def initialize_client():
    transport = RequestsHTTPTransport(
        url='https://api.github.com/graphql',
        headers={'Authorization': f'token {token}'}
    )
    return Client(transport=transport, fetch_schema_from_transport=True)

client = initialize_client()

In [None]:
# Load GraphQL query from file
def load_query(file_path):
    with open(file_path, 'r') as file:
        return file.read()

# Define paths to query files
comments_query_path = '../queries/bitcoin-core-comments.graphql'
reactions_query_path = '../queries/bitcoin-core-reactions.graphql'

# Load queries
query_comments = load_query(comments_query_path)
query_reactions = load_query(reactions_query_path)

# Define function to execute queries
def execute_query(query):
    return client.execute(gql(query))

# Execute queries
result_comments = execute_query(query_comments)
result_reactions = execute_query(query_reactions)

In [None]:
# Extract data from reactions
def extract_reactions_data(client, query, keywords):
    reactions_data = []
    issue_cursor = None
    comment_cursor = None
    reaction_cursor = None
    
    while True:
        variables = {
            'issueCursor': issue_cursor,
            'commentCursor': comment_cursor,
            'reactionCursor': reaction_cursor
        }
        
        result = client.execute(gql(query), variable_values=variables)
        issues = result.get('repository', {}).get('issues', {}).get('edges', [])
        
        for issue_edge in issues:
            issue = issue_edge.get('node', {})
            comments = issue.get('comments', {}).get('edges', [])
            for comment_edge in comments:
                comment = comment_edge.get('node', {})
                comment_body = comment.get('body', '')
                comment_date = comment.get('createdAt', '')
                if any(keyword.lower() in comment_body.lower() for keyword in keywords):
                    reactions = comment.get('reactions', {}).get('edges', [])
                    for reaction_edge in reactions:
                        reaction = reaction_edge.get('node', {})
                        if comment_date:
                            reactions_data.append({
                                'type': f"Reaction: {reaction.get('content', '')}",
                                'created_at': comment_date
                            })
            page_info = result.get('reactions', {}).get('issues', {}).get('pageInfo', {})
            issue_cursor = page_info.get('endCursor', None)
            if not page_info.get('hasNextPage', False):
                reaction_cursor = None
        
        page_info = result.get('repository', {}).get('issues', {}).get('pageInfo', {})
        issue_cursor = page_info.get('endCursor', None)
        if not page_info.get('hasNextPage', False):
            break
    
    return reactions_data

# Extract reactions data
keywords = ['security', 'privacy', 'ethics', 'confidentiality', 'integrity']
reactions_data = extract_reactions_data(client, query_reactions, keywords)
reactions_data

In [None]:
def extract_comments_data(client, query, keywords):
    comments_data = []
    issue_cursor = None
    comment_cursor = None
    
    while True:
        variables = {
            'issueCursor': issue_cursor,
            'commentCursor': comment_cursor
        }
        
        result = client.execute(gql(query), variable_values=variables)
        issues = result.get('repository', {}).get('issues', {}).get('edges', [])
        for issue_edge in issues:
            issue = issue_edge.get('node', {})
            comments = issue.get('comments', {}).get('edges', [])
            print(comments)
            for comment_edge in comments:
                comment = comment_edge.get('node', {})
                comment_body = comment.get('body', '')
                comment_date = comment.get('createdAt', '')
                print(comment_date)
                if any(keyword.lower() in comment_body.lower() for keyword in keywords):
                    if comment_date:
                        comments_data.append({'type': 'Comment', 'created_at': comment_date})

            page_info = issue.get('comments', {}).get('pageInfo', {})
            comment_cursor = page_info.get('endCursor', None)
            if not page_info.get('hasNextPage', False):
                comment_cursor = None
                
        page_info = result.get('repository', {}).get('issues', {}).get('pageInfo', {})
        issue_cursor = page_info.get('endCursor', None)
        if not page_info.get('hasNextPage', False):
            break
    
    return comments_data


# Extract comments data
comments_data = extract_comments_data(client, query_comments, keywords)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def analyze_and_plot_all(data_list):
    # Convert data list to DataFrame
    df = pd.DataFrame(data_list)
    
    # Check if 'created_at' column exists
    if 'created_at' not in df.columns:
        print("Error: 'created_at' column not found in the data")
        print("Data preview:")
        print(df.head())
        return
    
    # Filter out rows with invalid 'created_at' values
    df = df[df['created_at'].apply(lambda x: isinstance(x, str))]  # Keep only string entries
    
    # Convert 'created_at' to datetime, ignoring errors
    df['created_at'] = pd.to_datetime(df['created_at'], errors='coerce')
    df = df.dropna(subset=['created_at'])  # Drop rows where conversion failed
    
    # Set 'created_at' as index
    df.set_index('created_at', inplace=True)
    
    # Count each type by month
    monthly_data = df.groupby('type').resample('MS').size().unstack().fillna(0)
    
    # Reset index to include 'created_at' in the DataFrame
    plot_data = monthly_data.reset_index()
    
    # Check if 'created_at' is in plot_data
    if 'created_at' in plot_data.columns:
        # Convert 'created_at' to period format and add as 'Month'
        plot_data['Month'] = plot_data['created_at'].dt.to_period('M').astype(str)
        plot_data.set_index('Month', inplace=True)
    
        # Plotting
        fig, ax = plt.subplots(figsize=(12, 8))
        plot_data.drop(columns='created_at').plot(ax=ax, marker='o', linestyle='-')
        plt.title('Monthly Interactions Containing Ethical Keywords')
        plt.xlabel('Year/Month')
        plt.ylabel('Number of Interactions')
        plt.legend(title='Interaction Type')
        plt.grid(True)
        plt.tight_layout()
        plt.show()
    else:
        print("Error: 'created_at' column not found in the DataFrame after resetting index.")
        print("Data preview:")
        print(plot_data.head())

# Combine data and plot
all_data = comments_data + reactions_data
analyze_and_plot_all(reactions_data)
