<a href="https://colab.research.google.com/github/azizbramli/stage/blob/main/Copie_de_aziz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Example user feedback data
feedback_data = {
    'code': [
        'def add(a: int, b: int) -> int:\n    return a + b',  # Good practice
        'def add(a, b):\n    return a + b',  # Missing type hints
        'def multiply(a: int, b: int) -> int:\n    """Multiplies two integers and returns the result."""\n    return a * b',  # Good practice
        'def multiply(a, b):\n    return a * b',  # Missing docstring
        'import logging\n\ndef divide(a: int, b: int) -> float:\n    if b == 0:\n        logging.error("Division by zero attempt")\n        return float(\'inf\')\n    return a / b',  # Good practice with logging
        'def divide(a, b):\n    return a / b'  # Lacks error handling
    ],
    'review': [
        'Best Practice: Type hints are used for clarity.',
        'Not Best Practice: Missing type hints.',
        'Best Practice: Includes a docstring explaining the function.',
        'Not Best Practice: Missing docstring.',
        'Best Practice: Uses logging for error handling.',
        'Not Best Practice: Lacks error handling.'
    ]
}


# Create DataFrame
df_feedback = pd.DataFrame(feedback_data)

# Save to CSV
df_feedback.to_csv('user_feedback.csv', index=False)
print("User feedback data saved to user_feedback.csv.")


User feedback data saved to user_feedback.csv.


In [None]:
import pandas as pd
import os
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split  # Importing the function needed

# File paths
feedback_file = 'user_feedback.csv'
model_file = 'text_classification_model.joblib'

def save_feedback(code_snippet, review):
    """Save user feedback to the CSV file and update the model."""
    data = {'code': [code_snippet], 'review': [review]}
    df = pd.DataFrame(data)

    if os.path.exists(feedback_file):
        df.to_csv(feedback_file, mode='a', header=False, index=False)
    else:
        df.to_csv(feedback_file, index=False)

    print("Feedback saved successfully.")
    retrain_model()

def retrain_model():
    """Retrain the model with the latest feedback."""
    if os.path.exists(feedback_file):
        df = pd.read_csv(feedback_file)
        X = df['code']
        y = df['review']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        model = make_pipeline(TfidfVectorizer(), LogisticRegression(max_iter=1000))
        model.fit(X_train, y_train)

        joblib.dump(model, model_file)
        print("Model retrained and saved successfully.")
    else:
        print("No feedback data available to retrain the model.")

# Example usage
if __name__ == "__main__":
    code_input = input("Enter code snippet: ")
    feedback_input = input("Enter feedback (Best Practice / Not Best Practice): ")
    save_feedback(code_input, feedback_input)


Enter code snippet:  a
Enter feedback (Best Practice / Not Best Practice):  a


Feedback saved successfully.
Model retrained and saved successfully.


In [None]:
import joblib

# Load the updated model
model = joblib.load('text_classification_model.joblib')

# Use the model for predictions
def predict_best_practice(code_snippet):
    prediction = model.predict([code_snippet])
    return prediction[0]

# Example usage
new_code = 'def sum(int a,))) return b;'
print(f"Prediction: {predict_best_practice(new_code)}")


Prediction: Best Practice: Includes a docstring explaining the function.


In [None]:
import pandas as pd
import os
import joblib
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

# File paths
feedback_file = 'user_feedback.csv'
model_file = 'text_classification_model.joblib'
codesearchnet_file = 'Desktop/java_dedupe_definitions_v2.pkl'  # Path to your pickled dataset

def load_codesearchnet_data(filepath):
    """Load the CodeSearchNet pickled dataset."""
    with open(filepath, 'rb') as file:
        data = pickle.load(file)

    # Check if the loaded data is a DataFrame, list, or dictionary
    if isinstance(data, pd.DataFrame):
        df = data
    elif isinstance(data, list):
        df = pd.DataFrame(data)
    else:
        raise TypeError("Unexpected data type in the pickled file.")

    # Assuming the DataFrame has a 'code' column with code snippets
    if 'code' in df.columns:
        df = df[['code']]
        df.columns = ['code']  # Rename column to match your existing code
        df['review'] = 'Best Practice'  # Default label, adjust based on your needs
        return df
    else:
        raise ValueError("'code' column not found in the dataset.")

def save_feedback(code_snippet, review):
    """Save user feedback to the CSV file and update the model."""
    data = {'code': [code_snippet], 'review': [review]}
    df = pd.DataFrame(data)

    if os.path.exists(feedback_file):
        df.to_csv(feedback_file, mode='a', header=False, index=False)
    else:
        df.to_csv(feedback_file, index=False)

    print("Feedback saved successfully.")
    retrain_model()

def retrain_model():
    """Retrain the model with the latest feedback and CodeSearchNet data."""
    if os.path.exists(feedback_file):
        df_feedback = pd.read_csv(feedback_file)
    else:
        df_feedback = pd.DataFrame(columns=['code', 'review'])

    # Load CodeSearchNet data
    df_codesearchnet = load_codesearchnet_data(codesearchnet_file)

    # Combine feedback with CodeSearchNet data
    df = pd.concat([df_feedback, df_codesearchnet], ignore_index=True)

    # Prepare data
    X = df['code']
    y = df['review']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = make_pipeline(TfidfVectorizer(), LogisticRegression(max_iter=1000))
    model.fit(X_train, y_train)

    joblib.dump(model, model_file)
    print("Model retrained and saved successfully.")

# Example usage
if __name__ == "__main__":
    code_input = input("Enter code snippet: ")
    feedback_input = input("Enter feedback (Best Practice / Not Best Practice): ")
    save_feedback(code_input, feedback_input)


Enter code snippet:  a
Enter feedback (Best Practice / Not Best Practice):  a


Feedback saved successfully.


MemoryError: 