# TAM Analysis Notebook
This notebook analyzes Technology Acceptance Model (TAM) scores for students and parents in Ireland and India, using data from multiple Excel sheets. Visualizations are created using Plotly Express.

In [14]:
import pandas as pd
import numpy as np
import plotly.express as px
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import warnings
import difflib
warnings.filterwarnings('ignore')

## Load and Prepare Data
Load datasets from Excel file and add country and group labels.

In [15]:
# Load datasets
students_ireland = pd.read_excel('Data.xlsx', sheet_name='Students_Ireland')
parents_ireland = pd.read_excel('Data.xlsx', sheet_name='Parents_Ireland')
students_india = pd.read_excel('Data.xlsx', sheet_name='Students_India')
parents_india = pd.read_excel('Data.xlsx', sheet_name='Parents_India')

# Add country and group labels
students_ireland['Country'] = 'Ireland'
students_ireland['Group'] = 'Student'
parents_ireland['Country'] = 'Ireland'
parents_ireland['Group'] = 'Parent'
students_india['Country'] = 'India'
students_india['Group'] = 'Student'
parents_india['Country'] = 'India'
parents_india['Group'] = 'Parent'

## Column Matching Utility
Helper code to find matching column names in datasets.

In [16]:
# Find the closest matching column name
def find_column(df, search):
    for col in df.columns:
        if search.strip() in col.strip():
            return col
    matches = difflib.get_close_matches(search.strip(), df.columns, n=1, cutoff=0.6)
    if matches:
        return matches[0]
    raise KeyError(f"Column '{search}' not found in DataFrame. Available columns: {list(df.columns)}")

## Calculate Student TAM Scores
Calculate TAM scores for students in Ireland and India.

In [17]:
# Mapping for India student data
confidence_map = {
    'Not at all confident': 1, 
    'A little confident': 2, 
    'Quite a bit confident': 3, 
    'Really confident': 4
}
interest_map = {
    'Not at all interested': 1, 
    'A little interested': 2, 
    'Quite a bit interested': 3, 
    'Really interested': 4
}

# Find columns for India students
coding_conf_col = find_column(students_india, 'How confident are you in your coding skills')
computer_conf_col = find_column(students_india, 'How confident are you in your computer skills')
interest_stem_col = find_column(students_india, 'How interested are you in science and technology')
interest_maths_col = find_column(students_india, 'How interested are you in maths')
gender_col_india = find_column(students_india, 'Are you a boy or a girl')

# Map India student responses
students_india['coding_confidence_num'] = students_india[coding_conf_col].map(confidence_map)
students_india['computer_confidence_num'] = students_india[computer_conf_col].map(confidence_map)
students_india['interest_stem_num'] = students_india[interest_stem_col].map(interest_map)
students_india['interest_maths_num'] = students_india[interest_maths_col].map(interest_map)

# Calculate TAM scores for India students
students_india['PU_Score'] = students_india[['interest_stem_num', 'interest_maths_num']].mean(axis=1)
students_india['PEOU_Score'] = students_india[['coding_confidence_num', 'computer_confidence_num']].mean(axis=1)
students_india['Attitude_Score'] = students_india[['interest_stem_num', 'interest_maths_num']].mean(axis=1)

# Map gender
gender_map = {'Boy': 1, 'Girl': 2, 'Neither': 3}
students_india['Gender_Num'] = students_india[gender_col_india].map(gender_map)

# Find columns for Ireland students
interest_stem_col_ie = find_column(students_ireland, 'How interested are you in science and technology')
interest_maths_col_ie = find_column(students_ireland, 'How interested are you in maths')
computer_conf_col_ie = find_column(students_ireland, 'How confident are you in your computer skills')
coding_conf_col_ie = find_column(students_ireland, 'How confident are you in your coding skills')
gender_col_ie = find_column(students_ireland, 'Are you a boy or a girl')

# Calculate TAM scores for Ireland students
students_ireland['PU_Score'] = students_ireland[[interest_stem_col_ie, interest_maths_col_ie]].mean(axis=1)
students_ireland['PEOU_Score'] = students_ireland[[computer_conf_col_ie, coding_conf_col_ie]].mean(axis=1)
students_ireland['Attitude_Score'] = students_ireland[[interest_stem_col_ie, interest_maths_col_ie]].mean(axis=1)

# Map gender for Ireland
students_ireland['Gender_Num'] = students_ireland[gender_col_ie].map(gender_map)

## Calculate Parent TAM Scores
Calculate TAM scores for parents in Ireland and India.

In [18]:
# Ireland Parents TAM
parents_ireland['PU_Score'] = parents_ireland['My child finds technology'].fillna(4)
parents_ireland['PEOU_Score'] = parents_ireland['My child finds coding'].fillna(4)

# India Parents TAM mapping
india_tech_map = {
    'Very Boring': 1, 'Somewhat Boring': 2, 'Neither boring Nor Interesting': 3,
    'Somewhat Interesting': 4, 'Interesting': 5, 'Very Interesting': 6
}

# Find columns for India parents
tech_col_india = find_column(parents_india, 'My child finds technology')
coding_col_india = find_column(parents_india, 'My child finds coding')

# Map India parent responses
parents_india['tech_interest_num'] = parents_india[tech_col_india].map(india_tech_map)
parents_india['coding_interest_num'] = parents_india[coding_col_india].map(india_tech_map)

# Calculate TAM scores for India parents
parents_india['PU_Score'] = parents_india['tech_interest_num'].fillna(4)
parents_india['PEOU_Score'] = parents_india['coding_interest_num'].fillna(4)

## Combine Datasets
Combine student and parent datasets for analysis.

In [19]:
students_combined = pd.concat([
    students_ireland[['PU_Score', 'PEOU_Score', 'Attitude_Score', 'Country', 'Group', 'Gender_Num']],
    students_india[['PU_Score', 'PEOU_Score', 'Attitude_Score', 'Country', 'Group', 'Gender_Num']]
], ignore_index=True)

parents_combined = pd.concat([
    parents_ireland[['PU_Score', 'PEOU_Score', 'Country', 'Group']],
    parents_india[['PU_Score', 'PEOU_Score', 'Country', 'Group']]
], ignore_index=True)

## TAM Comparison Chart
Visualize average TAM scores for students and parents by country.

In [20]:
# Students comparison
student_means = students_combined.groupby('Country')[['PU_Score', 'PEOU_Score', 'Attitude_Score']].mean().reset_index()
student_melted = pd.melt(student_means, id_vars=['Country'], value_vars=['PU_Score', 'PEOU_Score', 'Attitude_Score'], 
                         var_name='TAM_Dimension', value_name='Score')
fig = px.bar(student_melted, x='Country', y='Score', color='TAM_Dimension', barmode='group',
             title='Student TAM Scores by Country', labels={'Score': 'Average Score'})
fig.show()

# Parents comparison
parent_means = parents_combined.groupby('Country')[['PU_Score', 'PEOU_Score']].mean().reset_index()
parent_melted = pd.melt(parent_means, id_vars=['Country'], value_vars=['PU_Score', 'PEOU_Score'],
                        var_name='TAM_Dimension', value_name='Score')
fig = px.bar(parent_melted, x='Country', y='Score', color='TAM_Dimension', barmode='group',
             title='Parent TAM Scores by Country', labels={'Score': 'Average Score'})
fig.show()

## Gender Stereotype Heatmap
Visualize parent perceptions of gender abilities in technology.

In [21]:
# Use exact column names for gender stereotype analysis based on diagnostic output
ireland_girls_col = 'I think that girls usually do well in technology. Please rate how much you agree with this statement on a scale from 0 to 100 by writing a number in the box, where 0 is not true at all and 100 is very much true'
ireland_boys_col = 'I think that boys usually do well in technology. Please rate how much you agree with this statement on a scale from 0 to 100 by writing a number in the box, where 0 is not true at all and 100 is very much true'
india_girls_col = 'I think that girls usually do well in technology. Please rate how much you agree with this statement on a scale from 0 to 100 by writing a number in the box, where 0 is not true at all and 100 is very'
india_boys_col = 'I think that boys usually do well in technology. Please rate how much you agree with this statement on a scale from 0 to 100 by writing a number in the box, where 0 is not true at all and 100 is very '

# Calculate means directly (values are already numeric 0-100)
ireland_girls = parents_ireland[ireland_girls_col].mean()
ireland_boys = parents_ireland[ireland_boys_col].mean()
india_girls = parents_india[india_girls_col].mean()
india_boys = parents_india[india_boys_col].mean()

gender_data = pd.DataFrame({
    'Country': ['Ireland', 'India'],
    'Girls_Technology_Ability': [ireland_girls, india_girls],
    'Boys_Technology_Ability': [ireland_boys, india_boys]
})

## Confidence vs Career Interest Scatter
Visualize relationship between coding confidence and STEM interest.

In [22]:
# Create scatter plot
fig = px.scatter(students_combined, x='PEOU_Score', y='PU_Score', color='Country', size_max=10,
                 title='Technology Confidence vs STEM Interest by Country',
                 labels={'PEOU_Score': 'Perceived Ease of Use (Coding Confidence)', 'PU_Score': 'Perceived Usefulness (STEM Interest)'})

# Add trend line
x = students_combined['PEOU_Score'].dropna()
y = students_combined['PU_Score'].dropna()
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
trend_data = pd.DataFrame({'x': x, 'y': p(x)})
fig.add_scatter(x=trend_data['x'], y=trend_data['y'], mode='lines', name='Trend Line', line=dict(color='red', dash='dash'))

# Add correlation annotation
correlation = stats.pearsonr(x, y)[0]
fig.add_annotation(x=0.05, y=0.95, xref='paper', yref='paper', showarrow=False,
                   text=f'Overall Correlation: {correlation:.3f}', bgcolor='wheat')
fig.show()

## Correlation Matrix
Visualize correlations between TAM dimensions.

In [23]:
# Calculate correlation
corr_data = students_combined[['PU_Score', 'PEOU_Score', 'Attitude_Score']].corr().reset_index().melt(id_vars='index', var_name='Variable2', value_name='Correlation')

# Create heatmap
fig = px.density_heatmap(corr_data, x='index', y='Variable2', z='Correlation', text_auto='.3f',
                         title='TAM Dimensions Correlation Matrix (All Students)', color_continuous_scale='RdBu',
                         labels={'Correlation': 'Correlation Coefficient', 'index': 'Variable1'})
fig.show()

## Gender Differences Analysis
Visualize TAM scores by gender and country.

In [24]:
# Filter out missing gender data for student box plots
student_gender_data = students_combined.dropna(subset=['Gender_Num'])

# Create box plots
for dim, title in zip(['PU_Score', 'PEOU_Score', 'Attitude_Score'], ['Perceived Usefulness', 'Perceived Ease of Use', 'Attitude']):
    fig = px.box(student_gender_data, x='Gender_Num', y=dim, color='Country',
                 title=f'{title} by Gender and Country',
                 labels={'Gender_Num': 'Gender (1=Boy, 2=Girl)', dim: 'TAM Score'})
    fig.show()

## Clustering Analysis
Perform K-means clustering on TAM scores and visualize in 3D.

In [25]:
# Prepare data for clustering
cluster_data = students_combined[['PU_Score', 'PEOU_Score', 'Attitude_Score']].dropna()

# Standardize data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(cluster_data)

# Perform K-means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(scaled_data)

# Add cluster labels
cluster_data_plot = cluster_data.copy()
cluster_data_plot['Cluster'] = clusters

# Create 3D scatter plot
fig = px.scatter_3d(cluster_data_plot, x='PU_Score', y='PEOU_Score', z='Attitude_Score', color='Cluster',
                    title='Student Clusters Based on TAM Scores',
                    labels={'PU_Score': 'Perceived Usefulness', 'PEOU_Score': 'Perceived Ease of Use', 'Attitude_Score': 'Attitude Score'})
fig.show()

## Summary Statistics
Generate comprehensive summary statistics.

In [26]:
# Ensure gender_data is not overwritten by student gender analysis
if 'Girls_Technology_Ability' not in gender_data.columns or 'Boys_Technology_Ability' not in gender_data.columns:
    # Recreate gender_data from parent perceptions if needed
    ireland_girls_col = 'I think that girls usually do well in technology. Please rate how much you agree with this statement on a scale from 0 to 100 by writing a number in the box, where 0 is not true at all and 100 is very much true'
    ireland_boys_col = 'I think that boys usually do well in technology. Please rate how much you agree with this statement on a scale from 0 to 100 by writing a number in the box, where 0 is not true at all and 100 is very much true'
    india_girls_col = 'I think that girls usually do well in technology. Please rate how much you agree with this statement on a scale from 0 to 100 by writing a number in the box, where 0 is not true at all and 100 is very'
    india_boys_col = 'I think that boys usually do well in technology. Please rate how much you agree with this statement on a scale from 0 to 100 by writing a number in the box, where 0 is not true at all and 100 is very '
    ireland_girls = parents_ireland[ireland_girls_col].mean()
    ireland_boys = parents_ireland[ireland_boys_col].mean()
    india_girls = parents_india[india_girls_col].mean()
    india_boys = parents_india[india_boys_col].mean()
    gender_data = pd.DataFrame({
        'Country': ['Ireland', 'India'],
        'Girls_Technology_Ability': [ireland_girls, india_girls],
        'Boys_Technology_Ability': [ireland_boys, india_boys]
    }).set_index('Country')

print('=== TAM ANALYSIS SUMMARY REPORT ===\n')

# Sample sizes
print('1. SAMPLE SIZES:')
print(f'   Students Ireland: {len(students_combined[students_combined["Country"]=="Ireland"])}')
print(f'   Students India: {len(students_combined[students_combined["Country"]=="India"])}')
print(f'   Parents Ireland: {len(parents_combined[parents_combined["Country"]=="Ireland"])}')
print(f'   Parents India: {len(parents_combined[parents_combined["Country"]=="India"])}\n')

# TAM scores by country
print('2. AVERAGE TAM SCORES BY COUNTRY (Students):')
student_summary = students_combined.groupby('Country')[['PU_Score', 'PEOU_Score', 'Attitude_Score']].agg(['mean', 'std'])
print(student_summary.round(3))
print()

# Gender stereotype analysis
print('3. GENDER STEREOTYPES IN TECHNOLOGY (Parents):')
print('   Average agreement scores (0-100 scale):')
print('   Available columns in gender_data:', list(gender_data.columns))

for country in ['Ireland', 'India']:
    girls_col = 'Girls_Technology_Ability' if 'Girls_Technology_Ability' in gender_data.columns else None
    boys_col = 'Boys_Technology_Ability' if 'Boys_Technology_Ability' in gender_data.columns else None
    girls_score = gender_data.loc[country, girls_col] if girls_col and country in gender_data.index else None
    boys_score = gender_data.loc[country, boys_col] if boys_col and country in gender_data.index else None
    print(f'   {country} - Girls capable: {girls_score:.1f}' if girls_score is not None else f'   {country} - Girls capable: N/A')
    print(f'   {country} - Boys capable: {boys_score:.1f}' if boys_score is not None else f'   {country} - Boys capable: N/A')

# Calculate bias
ireland_bias = (gender_data.loc['Ireland', boys_col] - gender_data.loc['Ireland', girls_col]) if boys_col and girls_col and 'Ireland' in gender_data.index else None
india_bias = (gender_data.loc['India', boys_col] - gender_data.loc['India', girls_col]) if boys_col and girls_col and 'India' in gender_data.index else None

print(f'\n   Gender bias (Boys - Girls scores):')
print(f'   Ireland: {ireland_bias:.1f} points' if ireland_bias is not None else '   Ireland: N/A')
print(f'   India: {india_bias:.1f} points\n' if india_bias is not None else '   India: N/A\n')

# Key insights
print('4. KEY INSIGHTS:')
ireland_students = students_combined[students_combined['Country']=='Ireland']
india_students = students_combined[students_combined['Country']=='India']

ireland_pu = ireland_students['PU_Score'].mean()
india_pu = india_students['PU_Score'].mean()
ireland_peou = ireland_students['PEOU_Score'].mean()
india_peou = india_students['PEOU_Score'].mean()

print(f'   • {"India" if india_pu > ireland_pu else "Ireland"} students show higher STEM interest')
print(f'   • {"India" if india_peou > ireland_peou else "Ireland"} students show higher coding confidence')
if ireland_bias is not None and india_bias is not None:
    print(f'   • {"Ireland" if ireland_bias < india_bias else "India"} shows less gender bias in technology')
    if abs(ireland_bias) > 10 or abs(india_bias) > 10:
        print(f'   • Significant gender bias detected (>10 points difference)')

correlation = stats.pearsonr(students_combined['PEOU_Score'].dropna(), students_combined['PU_Score'].dropna())[0]
print(f'   • Overall correlation between confidence and interest: {correlation:.3f}')

=== TAM ANALYSIS SUMMARY REPORT ===

1. SAMPLE SIZES:
   Students Ireland: 34
   Students India: 45
   Parents Ireland: 33
   Parents India: 44

2. AVERAGE TAM SCORES BY COUNTRY (Students):
        PU_Score        PEOU_Score        Attitude_Score       
            mean    std       mean    std           mean    std
Country                                                        
India      2.933  0.837      2.278  0.743          2.933  0.837
Ireland    2.103  0.860      2.971  0.674          2.103  0.860

3. GENDER STEREOTYPES IN TECHNOLOGY (Parents):
   Average agreement scores (0-100 scale):
   Available columns in gender_data: ['Country', 'Girls_Technology_Ability', 'Boys_Technology_Ability']
   Ireland - Girls capable: N/A
   Ireland - Boys capable: N/A
   India - Girls capable: N/A
   India - Boys capable: N/A

   Gender bias (Boys - Girls scores):
   Ireland: N/A
   India: N/A

4. KEY INSIGHTS:
   • India students show higher STEM interest
   • Ireland students show higher coding