In [1]:
# ensure appropriate modules/widgets are activated
import io
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
import asyncio
print(widgets.__version__)

8.1.7


In [7]:
import io
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

# ***File Upload Section***
file_upload = widgets.FileUpload(
    accept='.csv, .xlsx',
    multiple=False,
    description='Select File'
)
file_output = widgets.Output()

def handle_file_upload(change):
    # This callback is triggered when a file is selected.
    if file_upload.value:
        # Clear the output area before processing
        with file_output:
            file_output.clear_output()
        # Process each uploaded file (usually just one)
        for fname, file_info in file_upload.value.items():
            file_content = file_info.get('content', b'')
            file_stream = io.BytesIO(file_content)
            try:
                # First, try reading as CSV (assumes headers in the first row)
                df = pd.read_csv(file_stream, header=0)
            except Exception as e_csv:
                file_stream.seek(0)
                try:
                    # If CSV fails, try reading as Excel
                    df = pd.read_excel(file_stream, header=0)
                except Exception as e_excel:
                    with file_output:
                        file_output.clear_output()
                        print("Error reading file as CSV or Excel.")
                        print("CSV Error:", e_csv)
                        print("Excel Error:", e_excel)
                    return
            with file_output:
                file_output.clear_output()
                print(f"Loaded file: {fname}, size: {len(file_content)} bytes")
                print("First five rows:")
                display(df.head())

# Attach the callback to the file upload widget
file_upload.observe(handle_file_upload, names='value')

# Container displaying the file upload interface and output area
upload_box = widgets.VBox([
    widgets.Label("Upload your CSV or Excel file:"),
    file_upload,
    file_output
])

# ***Final Interface Display***
display(upload_box)


VBox(children=(Label(value='Upload your CSV or Excel file:'), FileUpload(value=(), accept='.csv, .xlsx', descr…

In [8]:
# Optional debugging test if errors encountered: print the current file upload value (run both lines below).
print("File Upload Value:")
print(file_upload.value)

File Upload Value:
({'name': 'search_1.xlsx', 'type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'size': 36765, 'content': <memory at 0x000001FD392FAF80>, 'last_modified': datetime.datetime(2025, 3, 27, 10, 19, 57, 413000, tzinfo=datetime.timezone.utc)},)


In [9]:
def transform_to_dataframe():
    """
    Checks the selected data input method (file upload or copy-paste),
    transforms the provided data into a Pandas DataFrame assuming the first row
    contains headers, and then displays a confirmation output that includes:
      - the file name or a label (for pasted data)
      - the header (list of column names)
      - the first five rows as an exemplar

    Displays the DataFrame preview if successful, or prints an error message otherwise.
    """
    # File import path
    if user_choice.value == 'Import a file directly':
        if file_upload.value:
            try:
                # Extract the uploaded file in a robust way
                if isinstance(file_upload.value, dict):
                    uploaded_file = next(iter(file_upload.value.values()))
                elif isinstance(file_upload.value, tuple):
                    uploaded_file = file_upload.value[0]
                else:
                    print("Unexpected file upload value type:", type(file_upload.value))
                    return

                # Get the file content and file name (if available)
                if isinstance(uploaded_file, dict):
                    fname = uploaded_file.get('name', 'Unknown file')
                    file_content = uploaded_file.get('content', b'')
                else:
                    fname = getattr(uploaded_file, 'name', 'Unknown file')
                    file_content = getattr(uploaded_file, 'content', b'')

                # Wrap content in a BytesIO stream (simulate a file object)
                file_stream = io.BytesIO(file_content)
                try:
                    df = pd.read_csv(file_stream, header=0)  # Assume first row is header
                except Exception as e_csv:
                    # If CSV reading fails, try Excel instead
                    file_stream.seek(0)
                    try:
                        df = pd.read_excel(file_stream, header=0)
                    except Exception as e_excel:
                        print("Error reading file as CSV or Excel.")
                        print("CSV Error:", e_csv)
                        print("Excel Error:", e_excel)
                        return

                # Confirmation message along with header and first five rows of the DataFrame
                print(f"Headed DataFrame loaded from file '{fname}':")
                print("DataFrame columns (header):", list(df.columns))
                print("First five rows:")
                display(df.head())
                # Do not return df to prevent automatic display of the full DataFrame

            except Exception as e:
                print("Error processing file upload:", e)
                return
        else:
            print("No file has been uploaded.")
            return

    # Pasted data path
    elif user_choice.value == 'Copy and paste data':
        raw_text = paste_text.value.strip()
        if not raw_text:
            print("No data pasted!")
            return
        try:
            # Determine the delimiter by checking text content
            if ',' in raw_text:
                delimiter = ','
            elif '\t' in raw_text:
                delimiter = '\t'
            else:
                delimiter = ','  # Default to comma
            df = pd.read_csv(io.StringIO(raw_text), sep=delimiter, header=0)
            print("Headed DataFrame loaded from pasted data:")
            print("DataFrame columns (header):", list(df.columns))
            print("First five rows:")
            display(df.head())
            # Do not return df here either

        except Exception as e:
            print("Error creating DataFrame:", e)
            return

# Run the transformation function to display the exemplar.
transform_to_dataframe()


Headed DataFrame loaded from file 'search_1.xlsx':
DataFrame columns (header): ['Publication Type', 'Authors', 'Book Authors', 'Book Editors', 'Book Group Authors', 'Author Full Names', 'Book Author Full Names', 'Group Authors', 'Article Title', 'Source Title', 'Book Series Title', 'Book Series Subtitle', 'Language', 'Document Type', 'Conference Title', 'Conference Date', 'Conference Location', 'Conference Sponsor', 'Conference Host', 'Author Keywords', 'Keywords Plus', 'Abstract', 'Addresses', 'Affiliations', 'Reprint Addresses', 'Email Addresses', 'Researcher Ids', 'ORCIDs', 'Funding Orgs', 'Funding Name Preferred', 'Funding Text', 'Cited References', 'Cited Reference Count', 'Times Cited, WoS Core', 'Times Cited, All Databases', '180 Day Usage Count', 'Since 2013 Usage Count', 'Publisher', 'Publisher City', 'Publisher Address', 'ISSN', 'eISSN', 'ISBN', 'Journal Abbreviation', 'Journal ISO Abbreviation', 'Publication Date', 'Publication Year', 'Volume', 'Issue', 'Part Number', 'Suppl

Unnamed: 0,Publication Type,Authors,Book Authors,Book Editors,Book Group Authors,Author Full Names,Book Author Full Names,Group Authors,Article Title,Source Title,...,Web of Science Index,Research Areas,IDS Number,Pubmed Id,Open Access Designations,Highly Cited Status,Hot Paper Status,Date of Export,UT (Unique WOS ID),Web of Science Record
0,J,"Kinyota, M",,,,"Kinyota, Mjege",,,Implementing ecopedagogy in highly centralised...,INTERNATIONAL STUDIES IN SOCIOLOGY OF EDUCATION,...,Emerging Sources Citation Index (ESCI),Education & Educational Research; Sociology,QS7FK,,,,,2025-03-27,WOS:000592627300001,View Full Record in Web of Science
1,J,"Misiaszek, GW; Rodrigues, C",,,,"William Misiaszek, Greg; Rodrigues, Cae",,,Teaching just-based environmental sustainabili...,TEACHING IN HIGHER EDUCATION,...,Social Science Citation Index (SSCI),Education & Educational Research,L9HV0,,Bronze,,,2025-03-27,WOS:001026310600001,View Full Record in Web of Science
2,J,"Misiaszek, GW",,,,"Misiaszek, Greg William",,,Ecopedagogy: teaching critical literacies of '...,TEACHING IN HIGHER EDUCATION,...,Social Science Citation Index (SSCI),Education & Educational Research,ME2HE,,,,,2025-03-27,WOS:000544479600005,View Full Record in Web of Science
3,J,"Misiaszek, GW",,,,"Misiaszek, Greg William",,,Beyond-Humans reinventions for criticality: po...,TEACHING IN HIGHER EDUCATION,...,Social Science Citation Index (SSCI),Education & Educational Research,Y5R9I,,,,,2025-03-27,WOS:001432703000001,View Full Record in Web of Science
4,J,"Alhazmi, A; Almashhour, RA",,,,"Alhazmi, Ahmed; Almashhour, Rasha Ahmed",,,Eco-pedagogy in chemistry education: challengi...,INTERNATIONAL JOURNAL OF SUSTAINABILITY IN HIG...,...,Social Science Citation Index (SSCI),Science & Technology - Other Topics; Education...,T8Y0F,,,,,2025-03-27,WOS:001407781300001,View Full Record in Web of Science


In [None]:
'''
After running the transform_to_dataframe() function, you can assign its output to any variable name you like. 
For example, to save the dataFrame as 'new_df', simply create a new cell and type: 
new_df = transform_to_dataframe()
This assigns the returned dataFrame to new_df, allowing you to reference and manipulate it using that name.
'''