In [16]:
import os
import pandas as pd
import dtale
import sys, os
project_root = os.path.abspath("..")  # adjust based on your directory structure
if project_root not in sys.path:
    sys.path.append(project_root)
    
from FeatureCleaning.CleanDSDTale import export_clean_df


def main():
    # 1. Load the cleaned issue-level DataFrame
    df = export_clean_df()
    
    # 2. Compute time-to-resolution in hours
    df['time_to_resolution'] = (
        pd.to_datetime(df['fields.resolutiondate']) - 
        pd.to_datetime(df['fields.created'])
    ).dt.total_seconds() / 3600  # in hours

    # 3. Aggregate overall project-level features (transformations and mean/median values)
    agg_df = df.groupby(['fields.project.id', 'fields.project.name']).agg({
        'time_to_resolution': ['mean', 'median', 'min', 'max', 'sum'],
        'changelog_count_status': ['sum', 'mean'],
        'changelog_count_assignee': ['sum', 'mean'],
        'comment_count': ['sum', 'mean']
    }).reset_index()
    
    # Flatten the MultiIndex columns
    agg_df.columns = [
        '_'.join(col).strip() if col[1] else col[0]
        for col in agg_df.columns.values
    ]
    
    # 4. Dynamically count issues by type for each project
    # Group by project id, project name, and issue type, then count
    issue_type_counts = (
        df.groupby(['fields.project.id', 'fields.project.name', 'fields.issuetype.name'])
          .size()
          .reset_index(name='count')
    )
    
    # Pivot so that each unique issue type becomes its own column
    issue_type_counts_pivot = issue_type_counts.pivot_table(
        index=['fields.project.id', 'fields.project.name'],
        columns='fields.issuetype.name',
        values='count',
        fill_value=0
    ).reset_index()
    
    # Rename dynamically created issue type columns to include a _COUNT suffix
    issue_type_counts_pivot.rename(
        columns=lambda x: f"{x.upper()}_COUNT" 
            if x not in ['fields.project.id', 'fields.project.name'] else x,
        inplace=True
    )
    
    # 5. Merge the transformation aggregates with the dynamic issue type counts
    final_df = pd.merge(
        agg_df,
        issue_type_counts_pivot,
        on=['fields.project.id', 'fields.project.name'],
        how='left'
    )
    
    # 6. Optionally display the final DataFrame using D-Tale for interactive exploration
    print(final_df.head())
    d = dtale.show(final_df, ignore_duplicate=True, allow_cell_edits=False)
    d.open_browser()

if __name__ == "__main__":
    main()


Processing repository: Hyperledger ...
Processing repository: SecondLife ...




The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Data processed. Launching D-Tale session...



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '<DatetimeArray>
['2011-07-18 14:14:43', '2010-11-04 09:11:57', '2011-06-27 06:45:10',
 '2011-02-23 23:10:24', '2010-11-05 16:31:24', '2011-07-18 14:06:39',
 '2011-12-13 21:46:13', '2011-09-19 07:54:26', '2013-10-23 19:37:27',
 '2012-06-18 13:29:03',
 ...
 '2020-01-22 22:12:20', '2020-01-13 15:11:18', '2019-03-29 20:33:28',
 '2018-08-10 19:52:41', '2017-08-05 20:38:16', '2017-08-28 11:36:29',
 '2019-05-08 13:08:45', '2020-08-27 08:06:06', '2017-10-13 17:54:30',
 '2019-03-29 20:34:39']
Length: 271, dtype: datetime64[ns]' has dtype incompatible with datetime64[ns, UTC], please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '<DatetimeArray>
['2011-05-16 15:57:03', '2010-10-15 19:48:35', '2010-04-15 16:30:45',
 '2010-12-06 11:39:48', '2010-09-30 16:33:17', '2011-06-20 21:55:54',
 '2011-11

✅ D-Tale session launched successfully.
   fields.project.id  fields.project.name  time_to_resolution_mean  \
0            10001.0             Sawtooth              4558.999199   
1            10002.0               Fabric              2637.846086   
2            10100.0  Blockchain Explorer              1044.239444   
3            10200.0                Cello              1387.685370   
4            10244.0            Snowstorm              2309.676722   

   time_to_resolution_median  time_to_resolution_min  time_to_resolution_max  \
0                 430.898333               70.361944            33506.378889   
1                 378.408889                0.002500            24148.719167   
2                 344.660556                0.793611             4079.931389   
3                  50.823333               19.088611             4093.144167   
4                 863.968611               66.277778            10819.966111   

   time_to_resolution_sum  changelog_count_status_sum  \
0