In [1]:
import pandas as pd
from datetime import timedelta

def load_and_merge_data(source_file, destination_file, output_file):
    source_df = pd.read_csv(source_file)
    destination_df = pd.read_csv(destination_file)

    source_df['Date'] = pd.to_datetime(source_df['Date'])
    destination_df['Date'] = pd.to_datetime(destination_df['Date'])

    time_difference = timedelta(weeks=1)
    previous_actual_values = []

    for index, row in destination_df.iterrows():
        current_date = row['Date']
        previous_date = current_date - time_difference

        time_difference_row = destination_df[destination_df['Date'] == previous_date]

        previous_actual_value = time_difference_row.iloc[0]['True Values'] if not time_difference_row.empty else None
        previous_actual_values.append(previous_actual_value)

    destination_df['Previous Actual Values'] = previous_actual_values
    destination_df = pd.merge(destination_df, source_df[['Date', 'Sentiment']], on="Date", how="left")
    destination_df.to_csv(output_file, index=False)
    print("Merged file created successfully at:", output_file)

def preprocess_data(input_file, output_file):
    data = pd.read_csv(input_file)
    data = data.dropna(subset=['Sentiment'])
    data.drop_duplicates(subset='Date', keep='first', inplace=True)
    data.to_csv(output_file, index=False)
    print("Data saved successfully at:", output_file)

def add_verify_column(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df['Verify'] = ""
    time_difference = timedelta(weeks=1)
    mask = ~df['Previous Actual Values'].isnull()

    for index, row in df[mask].iterrows():
        current_date = row['Date']
        previous_date = current_date - time_difference
        time_difference_row = df[df['Date'] == previous_date]

        if row['Sentiment'] == 'Positive':
            df.at[index, 'Verify'] = 'Correct' if row['Predicted Values'] > row['Previous Actual Values'] else 'Wrong'
        elif row['Sentiment'] == 'Negative':
            df.at[index, 'Verify'] = 'Correct' if row['Predicted Values'] < row['Previous Actual Values'] else 'Wrong'
        elif row['Sentiment'] == 'Neutral':
            df.at[index, 'Verify'] = 'Correct' if abs(row['Predicted Values'] - row['Previous Actual Values']) < 0.1 else 'Wrong'

    return df

def calculate_accuracy(df):
    correct_count = (df['Verify'] == 'Correct').sum()
    wrong_count = (df['Verify'] == 'Wrong').sum()
    total_count = correct_count + wrong_count
    accuracy = (correct_count / total_count) * 100 if total_count != 0 else 0
    return accuracy

# Usage:
source_file_path = "classified_titles.csv"
destination_file_path = "predicted_values_with_dates.csv"
merged_file_path = "merged_file_BERT.csv"
verify_file_path = "data_with_verify_BERT.csv"

load_and_merge_data(source_file_path, destination_file_path, merged_file_path)
preprocess_data(merged_file_path, verify_file_path)

data = pd.read_csv(verify_file_path)
data = add_verify_column(data)
data = data.dropna(subset=['Sentiment'])

accuracy = calculate_accuracy(data)
print(f"Accuracy: {accuracy:.2f}%")

Merged file created successfully at: merged_file_BERT.csv
Data saved successfully at: data_with_verify_BERT.csv
Accuracy: 51.08%
