In [1]:
!pip install ipywidgets pandas
!jupyter nbextension enable --py widgetsnbextension

usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: contrib dejavu events execute kernel kernelspec lab
labextension labhub migrate nbconvert nbextensions_configurator notebook run
server troubleshoot trust

Jupyter command `jupyter-nbextension` not found.


In [9]:
from ipywidgets import FileUpload, Button, Output, VBox, HBox, Tab
import io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

upload1 = FileUpload(description="Upload File 1")
upload2 = FileUpload(description="Upload File 2")
compare_btn = Button(description="Compare Files")
result_display = Output()

def compare_files(b):
    with result_display:
        result_display.clear_output()
        
        # Get file data
        file1 = next(iter(upload1.value.values()))
        file2 = next(iter(upload2.value.values()))
        
        # Convert to DataFrames
        df1 = pd.read_csv(io.BytesIO(file1['content']))
        df2 = pd.read_csv(io.BytesIO(file2['content']))
        
        # Perform comparisons
        print(f"Row count difference: {abs(len(df1) - len(df2))}")
        print(f"Null values comparison:")
        print(pd.concat([df1.isnull().sum(), df2.isnull().sum()], axis=1))
        
        # Column differences
        cols1 = set(df1.columns)
        cols2 = set(df2.columns)
        print(f"Missing columns in File 2: {cols1 - cols2}")
        print(f"Missing columns in File 1: {cols2 - cols1}")
        
        # Add advanced comparison
        advanced_comparison(df1, df2)
        
        # Export comparison results
        export_btn = Button(description="Export Results")
        export_btn.on_click(lambda b: export_results(df1, df2))
        display(export_btn)

def advanced_comparison(df1, df2):
    # 1. Data type comparison
    print("\nData Type Comparison:")
    dtypes_diff = pd.concat([df1.dtypes, df2.dtypes], axis=1)
    dtypes_diff.columns = ['File 1', 'File 2']
    display(dtypes_diff[dtypes_diff['File 1'] != dtypes_diff['File 2']])
    
    # 2. Value range analysis
    print("\nValue Range Analysis:")
    for col in df1.columns:
        if col in df2.columns and df1[col].dtype in ['int64', 'float64']:
            print(f"{col}:")
            print(f"  File 1: Min={df1[col].min()}, Max={df1[col].max()}")
            print(f"  File 2: Min={df2[col].min()}, Max={df2[col].max()}")
    
    # 3. Unique value counts
    print("\nUnique Value Counts Difference:")
    for col in df1.columns:
        if col in df2.columns:
            diff = df1[col].nunique() - df2[col].nunique()
            if diff != 0:
                print(f"{col}: {diff}")
    
    # 4. Correlation difference
    print("\nCorrelation Difference:")
    corr_diff = df1.corr() - df2.corr()
    display(corr_diff[abs(corr_diff) > 0.1].dropna(how='all').dropna(axis=1, how='all'))
    
    # 5. Sample data preview
    print("\nSample Data Preview (First 5 rows):")
    display(HTML(pd.concat([df1.head(), df2.head()], keys=['File 1', 'File 2']).to_html()))

def export_results(df1, df2):
    with io.StringIO() as buffer:
        buffer.write("Comparison Results\n\n")
        buffer.write(f"Row count difference: {abs(len(df1) - len(df2))}\n\n")
        buffer.write("Null values comparison:\n")
        buffer.write(pd.concat([df1.isnull().sum(), df2.isnull().sum()], axis=1).to_string())
        buffer.write("\n\nColumn differences:\n")
        buffer.write(f"Missing columns in File 2: {set(df1.columns) - set(df2.columns)}\n")
        buffer.write(f"Missing columns in File 1: {set(df2.columns) - set(df1.columns)}\n")
        
        # Add advanced comparison results
        buffer.write("\nData Type Comparison:\n")
        dtypes_diff = pd.concat([df1.dtypes, df2.dtypes], axis=1)
        dtypes_diff.columns = ['File 1', 'File 2']
        buffer.write(dtypes_diff[dtypes_diff['File 1'] != dtypes_diff['File 2']].to_string())
        
        # Save to file
        with open('comparison_results.txt', 'w') as f:
            f.write(buffer.getvalue())
        
        print("Results exported to 'comparison_results.txt'")

# Set up the UI
compare_btn.on_click(compare_files)
ui = VBox([
    HBox([upload1, upload2]),
    compare_btn,
    result_display
])

# Display the UI (only once)
display(ui)


VBox(children=(HBox(children=(FileUpload(value=(), description='Upload File 1'), FileUpload(value=(), descript…