# Issue Classification Example

In this example we perform a logical regression on a dataset of github issues to predict the labels on newly entered tickets.

In a second step we try to find issues that deal with similar problems.

In [None]:
# required libraries
from github import Github
import os
import pandas as pd
import numpy as np
import pickle

In [28]:

# Data Preparation 

## Retrieve the issues from github

force_fetch = False

access_token = USER = os.getenv('GH_API_ACCESS')
token = Github(access_token)
repo = token.get_repo('quarkusio/quarkus')

# load issue if they don't exist (or forced)
if (force_fetch or not os.path.exists('../data/issues.pkl')): 
    issues = repo.get_issues(state='open')  
    
    cols = columns = ['number', 'title', 'body', 'labels', 'state']
    df = pd.DataFrame(columns = cols)
    unique_labels = set()

    for issue in issues:    
        label_names = []
        for label in issue.labels:        
            label_names.append(label.name)
            unique_labels.add(label.name)
        new_record = pd.DataFrame([[issue.number, issue.title, issue.body, label_names, issue.state]], columns=cols)
        df = pd.concat([df, new_record], ignore_index=True)    

    pickle.dump(issues, open('../data/issues.pkl', 'wb'))
    pickle.dump(unique_labels, open('../data/labels.pkl', 'wb'))
else:
    print("Loading issues from file...")
    unique_labels = pickle.load(open("../data/labels.pkl", 'rb'))
    issues = pickle.load(open("../data/issues.pkl", 'rb'))

# let's see what we have
print("Number of issues in total: ", issues.totalCount)
print("Unique labels ({0})".format(len(unique_labels)))    
df.head()


Loading issues from file...
Number of issues in total:  2290
Unique labels (142)


Unnamed: 0,number,title,body,labels,state
0,35635,Refactor Hibernate config mapping to use group...,Fixes #35631\r\n\r\nFollows up on commit ad94a...,"[area/persistence, area/hibernate-orm, area/hi...",open
1,35634,Extends ServerInterceptor not working,### Describe the bug\r\n\r\n1. define an `inte...,"[kind/bug, triage/needs-triage]",open
2,35633,Make hibernate reactive status clear in docs,seem we missed the regular inclusion of status...,"[area/documentation, triage/backport-2.13?, tr...",open
3,35631,Quarkus 3.2: Hibernate ORM configuration probl...,### Describe the bug\n\nI'm upgrading to Quark...,"[kind/bug, area/persistence, area/hibernate-orm]",open
4,35630,Fix doc link asciidoc change link to xref wher...,This PR fixes some broken 404s identified duri...,"[area/documentation, triage/backport?, area/do...",open


In [20]:
df.head()

2290