In [6]:
pip install requests pymongo scikit-learn




In [7]:
!pip install streamlit



In [8]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def train_logistic_regression_model(username, repo_data):
    # Convert repo data to text
    repo_descriptions = [repo['description'] or '' for repo in repo_data]

    # Vectorize repository descriptions
    vectorizer = TfidfVectorizer()
    repo_matrix = vectorizer.fit_transform(repo_descriptions)

    # Fetch user's repository data
    user_repo_data = fetch_repo_data(username)

    # Create labels (1 for starred, 0 for not starred)
    labels = [1 if repo['stargazers_count'] > 0 else 0 for repo in user_repo_data]

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(repo_matrix, labels, test_size=0.2, random_state=42)

    # Train Logistic Regression model
    model = LogisticRegression()
    model.fit(X_train, y_train)

    # Make predictions on the test set
    predictions = model.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions)
    print(f"Model Accuracy: {accuracy}")

    return model


In [9]:
import streamlit as st

def main():
    st.title("GitHub Analyzer with Logistic Regression Recommendation")

    # Get GitHub username from the user
    username = st.text_input("Enter GitHub Username:")
    if st.button("Analyze"):
        user_data = fetch_user_data(username)
        repo_data = fetch_repo_data(username)

        # Save data to MongoDB
        db = connect_to_mongo()
        save_user_data(db, user_data)
        save_repo_data(db, repo_data)

        # Train Logistic Regression model
        model = train_logistic_regression_model(username, repo_data)

        # Display results
        st.subheader("User Information")
        st.write(f"Username: {user_data['login']}")
        st.write(f"Email: {user_data.get('email', 'N/A')}")

        st.subheader("Top 5 Recommended Repositories")
        recommended_repositories = recommend_repositories_logistic_regression(username, repo_data, model)
        for repo in recommended_repositories:
            st.write(f"- {repo['name']}")

if __name__ == "__main__":
    main()


In [10]:
def recommend_repositories_logistic_regression(username, repo_data, model):
    # Convert repo data to text
    repo_descriptions = [repo['description'] or '' for repo in repo_data]

    # Vectorize repository descriptions
    vectorizer = TfidfVectorizer()
    repo_matrix = vectorizer.fit_transform(repo_descriptions)

    # Predict whether the user will like each repository
    predictions = model.predict(repo_matrix)

    # Get indices of repositories predicted to be liked
    recommended_repo_indices = [i for i, prediction in enumerate(predictions) if prediction == 1]

    # Display top 5 recommended repositories
    recommended_repositories = [repo_data[i] for i in recommended_repo_indices[:5]]

    return recommended_repositories
