# Preprocessing

In [90]:
%pip install pydriller




In [91]:
from pydriller import Repository
import os
import datetime as dt
from datetime import timedelta, datetime

In [92]:
# Clone code from the repo and save it for code portability -> via normal git clone
url = "https://github.com/apache/kafka"
repo_path = os.path.join(os.getcwd(), 'kafka')
clone = f"git clone {url} {repo_path}" 

os.system(clone) # Cloning

128

In [93]:
# checkout the tag 3.6.0
os.chdir(repo_path)
os.system("git checkout 3.6.0")
# back to the "home" folder
os.chdir("..")

In [94]:
from_tag = "3.5.1"
from_date=dt.datetime(2023, 7, 14, 18, 51, 0)

to_tag = "3.6.0"
to_date=dt.datetime(2023, 9, 29, 6, 56, 0)

# Using datetimes of the releases, since tags don't yield any commits
# Open question 
# repo = Repository(path_to_repo=repo_path, from_tag=to_tag, to_tag=from_tag)
repo = Repository(path_to_repo=repo_path, since=from_date, to=to_date)

# Exercise 3

In [95]:
# Create a matrix of all current java files
import os

def get_unique_java_files(directory):
    unique_files = set()
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".java"):
                file_path = os.path.join(root, file)
                # remove the repo path from the file path
                file_path = file_path.replace(directory+"\\", "")
                unique_files.add(file_path)
    return list(unique_files)

# Replace 'your_repo_directory' with the path to your repository
unique_files = get_unique_java_files(repo_path)

In [96]:
def add_file_to_matrices(file, file_index_map, matrices):
    if file not in file_index_map:
        new_index = len(file_index_map)
        file_index_map[file] = new_index
        unique_files.append(file)  # Update the unique_files list
        
        for window in matrices:
            # Correctly expand each matrix to include the new file
            current_matrix = matrices[window]
            new_matrix_size = len(unique_files)
            new_matrix = np.zeros((new_matrix_size, new_matrix_size))
            # Copy the old matrix values into the new matrix
            new_matrix[:current_matrix.shape[0], :current_matrix.shape[1]] = current_matrix
            matrices[window] = new_matrix
        

In [100]:
def update_matrices(base_commit, matrices, file_index_map):
    commit_time = base_commit.committer_date  #time of the base commit for which we want to update the matrices

    base_modified_file_indices = []
    for file in base_commit.modified_files:
        # Accessing filename correctly 
        filename = file.new_path if file.new_path else file.old_path
        add_file_to_matrices(filename, file_index_map, matrices)
        base_modified_file_indices.append(file_index_map[filename])

    
    # get all modified files in the max time window
    for commit in Repository(path_to_repo=repo_path, 
                            since=commit_time, 
                            to=commit_time+timedelta(hours=168),
                            only_modifications_with_file_types=['.java'],
                            ).traverse_commits():

        modified_file_indices = []
        for file in commit.modified_files:
            # Accessing filename correctly 
            filename = file.new_path if file.new_path else file.old_path
            add_file_to_matrices(filename, file_index_map, matrices) #check if file is already in the matrices, if not add it (issue for files which were created and are not in repo now)
            modified_file_indices.append(file_index_map[filename])

        # increment the matrix values for each pair of files
        for i in base_modified_file_indices:
            for j in modified_file_indices:
                if i != j:  # Skip incrementing for the same file
                    for window in time_windows:
                        if commit_time - commit.committer_date <= timedelta(hours=window):
                            # if((i == 1715 and j == 2421) or (i == 2421 and j == 1715) ):
                            #     print("i: ", i, "j: ", j)
                            #     print("window: ", window)
                            #     print("commit_time: ", commit_time)
                            #     print("commit.committer_date: ", commit.committer_date)
                            #     print("old value: ", matrices[window][i, j])
                            #     print("new value: ", matrices[window][i, j] + 1)
                            matrices[window][i, j] += 1

In [98]:
# initalize matrix
import numpy as np

time_windows = [24, 48, 72, 168]  # time windows in hours
matrices = {window: np.zeros((len(unique_files), len(unique_files))) for window in time_windows}
file_index_map = {file: index for index, file in enumerate(unique_files)}


In [101]:
i = 1
all_commits = Repository(path_to_repo=repo_path, 
                         only_modifications_with_file_types=['.java'],
                         since=datetime(2023, 8, 20, 17, 59, 0)
                         ).traverse_commits()

for commit in all_commits:
    update_matrices(commit, matrices, file_index_map)
    # i -= 1
    # if i == 0:
    #     break

In [None]:
import pandas as pd

def matrices_to_dataframe(matrices, unique_files):
    df_list = []
    for window, matrix in matrices.items():
        df = pd.DataFrame(matrix, index=unique_files, columns=unique_files)
        df = df.stack().reset_index()
        df.columns = ['File1', 'File2', 'Count']
        df['Time_Window'] = window
        df_list.append(df)

    return pd.concat(df_list, ignore_index=True)

result_df = matrices_to_dataframe(matrices, unique_files)

In [None]:
matrices[24][1715,2421]

15.0

In [None]:
file_index_map["core\\src\\test\\java\\kafka\\log\\remote\\RemoteLogManagerTest.java"]

2421

In [None]:
# Example: Filter out pairs with zero counts
result_df = result_df[result_df['Count'] > 0]

# Sort, reset index, etc.
result_df = result_df.sort_values(by=['Count'], ascending=False).reset_index(drop=True)

In [None]:
# make file 1 and file2 display the whole name
pd.set_option('display.max_colwidth', None)
result_df.head(30)
# filtered_df = result_df[result_df['File1'].str.contains('RemoteLogManager.java')]
# filtered_df.head(30)

Unnamed: 0,File1,File2,Count,Time_Window
0,storage\src\test\java\org\apache\kafka\tiered\storage\TieredStorageTestHarness.java,core\src\main\java\kafka\log\remote\RemoteLogManager.java,17.0,168
1,storage\src\test\java\org\apache\kafka\tiered\storage\TieredStorageTestHarness.java,core\src\main\java\kafka\log\remote\RemoteLogManager.java,17.0,48
2,storage\src\test\java\org\apache\kafka\tiered\storage\TieredStorageTestHarness.java,core\src\main\java\kafka\log\remote\RemoteLogManager.java,17.0,24
3,storage\src\test\java\org\apache\kafka\tiered\storage\TieredStorageTestHarness.java,core\src\main\java\kafka\log\remote\RemoteLogManager.java,17.0,72
4,core\src\test\scala\integration\kafka\server\DynamicBrokerReconfigurationTest.scala,core\src\main\java\kafka\log\remote\RemoteLogManager.java,16.0,48
5,core\src\test\scala\integration\kafka\server\DynamicBrokerReconfigurationTest.scala,core\src\main\java\kafka\log\remote\RemoteLogManager.java,16.0,168
6,core\src\test\scala\integration\kafka\server\DynamicBrokerReconfigurationTest.scala,core\src\main\java\kafka\log\remote\RemoteLogManager.java,16.0,72
7,core\src\test\scala\integration\kafka\server\DynamicBrokerReconfigurationTest.scala,core\src\main\java\kafka\log\remote\RemoteLogManager.java,16.0,24
8,core\src\main\java\kafka\log\remote\RemoteLogManager.java,core\src\test\java\kafka\log\remote\RemoteLogManagerTest.java,15.0,72
9,core\src\test\java\kafka\log\remote\RemoteLogManagerTest.java,core\src\main\java\kafka\log\remote\RemoteLogManager.java,15.0,72


In [None]:
# import pandas as pd
# import re

# # Your data as a multi-line string
# data = """
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-25 05:30:49+05:30
# old value:  0.0
# new value:  1.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-25 05:30:49+05:30
# old value:  0.0
# new value:  1.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-25 05:30:49+05:30
# old value:  0.0
# new value:  1.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-25 05:30:49+05:30
# old value:  0.0
# new value:  1.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-25 05:30:49+05:30
# old value:  0.0
# new value:  1.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-25 05:30:49+05:30
# old value:  0.0
# new value:  1.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-25 05:30:49+05:30
# old value:  0.0
# new value:  1.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-25 05:30:49+05:30
# old value:  0.0
# new value:  1.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-28 10:34:26+00:00
# old value:  1.0
# new value:  2.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-28 10:34:26+00:00
# old value:  1.0
# new value:  2.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-28 10:34:26+00:00
# old value:  1.0
# new value:  2.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-08-25 05:30:49+05:30
# commit.committer_date:  2023-08-28 10:34:26+00:00
# old value:  1.0
# new value:  2.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-08-28 10:34:26+00:00
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  1.0
# new value:  2.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-08-28 10:34:26+00:00
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  1.0
# new value:  2.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-08-28 10:34:26+00:00
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  1.0
# new value:  2.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-08-28 10:34:26+00:00
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  1.0
# new value:  2.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  2.0
# new value:  3.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  2.0
# new value:  3.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  2.0
# new value:  3.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  2.0
# new value:  3.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  2.0
# new value:  3.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  2.0
# new value:  3.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  2.0
# new value:  3.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-04 09:14:29+05:30
# old value:  2.0
# new value:  3.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-05 10:28:35+05:30
# old value:  3.0
# new value:  4.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-05 10:28:35+05:30
# old value:  3.0
# new value:  4.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-05 10:28:35+05:30
# old value:  3.0
# new value:  4.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-05 10:28:35+05:30
# old value:  3.0
# new value:  4.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  3.0
# new value:  4.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  3.0
# new value:  4.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  3.0
# new value:  4.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  3.0
# new value:  4.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  4.0
# new value:  5.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  4.0
# new value:  5.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  4.0
# new value:  5.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  4.0
# new value:  5.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  4.0
# new value:  5.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  4.0
# new value:  5.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  4.0
# new value:  5.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  4.0
# new value:  5.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  5.0
# new value:  6.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  5.0
# new value:  6.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  5.0
# new value:  6.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  5.0
# new value:  6.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  6.0
# new value:  7.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  6.0
# new value:  7.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  6.0
# new value:  7.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-04 09:14:29+05:30
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  6.0
# new value:  7.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  5.0
# new value:  6.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  5.0
# new value:  6.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  5.0
# new value:  6.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  5.0
# new value:  6.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  6.0
# new value:  7.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  6.0
# new value:  7.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  6.0
# new value:  7.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  6.0
# new value:  7.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  7.0
# new value:  8.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  7.0
# new value:  8.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  7.0
# new value:  8.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-05 10:28:35+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  7.0
# new value:  8.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  8.0
# new value:  9.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  8.0
# new value:  9.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  8.0
# new value:  9.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  8.0
# new value:  9.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  7.0
# new value:  8.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  7.0
# new value:  8.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  7.0
# new value:  8.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-06 06:00:05+05:30
# old value:  7.0
# new value:  8.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  9.0
# new value:  10.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  9.0
# new value:  10.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  9.0
# new value:  10.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  9.0
# new value:  10.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  8.0
# new value:  9.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  8.0
# new value:  9.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  8.0
# new value:  9.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  8.0
# new value:  9.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  9.0
# new value:  10.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  9.0
# new value:  10.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  9.0
# new value:  10.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  9.0
# new value:  10.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  10.0
# new value:  11.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  10.0
# new value:  11.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  10.0
# new value:  11.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  10.0
# new value:  11.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  10.0
# new value:  11.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  10.0
# new value:  11.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  10.0
# new value:  11.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-06 06:00:05+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  10.0
# new value:  11.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  11.0
# new value:  12.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  11.0
# new value:  12.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  11.0
# new value:  12.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  11.0
# new value:  12.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  11.0
# new value:  12.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  11.0
# new value:  12.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  11.0
# new value:  12.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-07 14:37:22+00:00
# old value:  11.0
# new value:  12.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  12.0
# new value:  13.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  12.0
# new value:  13.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  12.0
# new value:  13.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-08 05:16:28+05:30
# old value:  12.0
# new value:  13.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  12.0
# new value:  13.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  12.0
# new value:  13.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  12.0
# new value:  13.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  12.0
# new value:  13.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  13.0
# new value:  14.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  13.0
# new value:  14.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  13.0
# new value:  14.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-07 14:37:22+00:00
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  13.0
# new value:  14.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-08 05:16:28+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  13.0
# new value:  14.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-08 05:16:28+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  13.0
# new value:  14.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-08 05:16:28+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  13.0
# new value:  14.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-08 05:16:28+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  13.0
# new value:  14.0
# i:  1715 j:  2421
# window:  24
# commit_time:  2023-09-12 10:13:44+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  14.0
# new value:  15.0
# i:  1715 j:  2421
# window:  48
# commit_time:  2023-09-12 10:13:44+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  14.0
# new value:  15.0
# i:  1715 j:  2421
# window:  72
# commit_time:  2023-09-12 10:13:44+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  14.0
# new value:  15.0
# i:  1715 j:  2421
# window:  168
# commit_time:  2023-09-12 10:13:44+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  14.0
# new value:  15.0
# i:  2421 j:  1715
# window:  24
# commit_time:  2023-09-12 10:13:44+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  14.0
# new value:  15.0
# i:  2421 j:  1715
# window:  48
# commit_time:  2023-09-12 10:13:44+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  14.0
# new value:  15.0
# i:  2421 j:  1715
# window:  72
# commit_time:  2023-09-12 10:13:44+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  14.0
# new value:  15.0
# i:  2421 j:  1715
# window:  168
# commit_time:  2023-09-12 10:13:44+05:30
# commit.committer_date:  2023-09-12 10:13:44+05:30
# old value:  14.0
# new value:  15.0

# """

# # Splitting the data into separate entries
# entries = [e for e in data.split("i:  ") if e.strip()]

# parsed_data = []
# for entry in entries:
#     parsed_entry = {}
#     lines = lines = ("i: " + entry).strip().split("\n")
#     for line in lines:
#         if line:
#             # print(line)
#             # Handling the 'i' and 'j' line
#             if line.startswith('i:'):
#                 i_value, j_value = line.replace('i:', '').replace('j:', '').split()
#                 parsed_entry['i'] = int(i_value.strip())
#                 parsed_entry['j'] = int(j_value.strip())
#             else:
#                 key, value = line.split(':', 1)  # Split only at the first colon
#                 key = key.strip()
#                 value = value.strip()
#                 if key in ['window']:
#                     parsed_entry[key] = int(value)
#                 elif key in ['old value', 'new value']:
#                     parsed_entry[key] = float(value)
#                 elif 'time' in key or 'date' in key:
#                     parsed_entry[key] = pd.to_datetime(value)
#     parsed_data.append(parsed_entry)

# # Creating a DataFrame
# df = pd.DataFrame(parsed_data)

In [None]:
# # display all rows
# pd.set_option('display.max_rows', None)
# df[df['i'] == 1715]


Unnamed: 0,i,j,window,commit_time,commit.committer_date,old value,new value
0,1715,2421,24,2023-08-25 05:30:49+05:30,2023-08-25 05:30:49+05:30,0.0,1.0
1,1715,2421,48,2023-08-25 05:30:49+05:30,2023-08-25 05:30:49+05:30,0.0,1.0
2,1715,2421,72,2023-08-25 05:30:49+05:30,2023-08-25 05:30:49+05:30,0.0,1.0
3,1715,2421,168,2023-08-25 05:30:49+05:30,2023-08-25 05:30:49+05:30,0.0,1.0
12,1715,2421,24,2023-08-28 10:34:26+00:00,2023-09-04 09:14:29+05:30,1.0,2.0
13,1715,2421,48,2023-08-28 10:34:26+00:00,2023-09-04 09:14:29+05:30,1.0,2.0
14,1715,2421,72,2023-08-28 10:34:26+00:00,2023-09-04 09:14:29+05:30,1.0,2.0
15,1715,2421,168,2023-08-28 10:34:26+00:00,2023-09-04 09:14:29+05:30,1.0,2.0
16,1715,2421,24,2023-09-04 09:14:29+05:30,2023-09-04 09:14:29+05:30,2.0,3.0
17,1715,2421,48,2023-09-04 09:14:29+05:30,2023-09-04 09:14:29+05:30,2.0,3.0
