In [1]:
import pandas as pd
import altair as alt
import numpy as np
data = pd.read_csv('C:/Users/91766/Downloads/MensT20wcdata.csv')
# Display basic information and statistical summary
print(data.info())
print(data.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Match_id                 45 non-null     int64 
 1   Team_1                   45 non-null     object
 2   Team_2                   45 non-null     object
 3                Venue       45 non-null     object
 4   Stage                    45 non-null     object
 5   Toss_winner              45 non-null     object
 6   Toss_decision            45 non-null     object
 7   Score_of_first_innings   45 non-null     object
 8   Wkts_in_first_innings    45 non-null     object
 9   Score_of_second_innings  45 non-null     object
 10  Wkts_in_second_innings   45 non-null     object
 11  Winner                   45 non-null     object
 12  Won by                   45 non-null     object
 13  Player_of_the_match      45 non-null     object
dtypes: int64(1), object(13)
memory usage: 5.1+ K

In [3]:

# Statistical summary of the Match_id column
data['Match_id'].describe()

count    45.000000
mean     23.000000
std      13.133926
min       1.000000
25%      12.000000
50%      23.000000
75%      34.000000
max      45.000000
Name: Match_id, dtype: float64

In [4]:
#a) Histogram of Scores in the First Innings
# Convert scores to numeric values
data['Score_of_first_innings'] = pd.to_numeric(data['Score_of_first_innings'], errors='coerce')

# Create a histogram of scores in the first innings
hist_first_innings = alt.Chart(data).mark_bar().encode(
    alt.X('Score_of_first_innings', bin=True),
    y='count()'
).properties(
    title='Histogram of Scores in the First Innings'
)

hist_first_innings


In [5]:
#b) Bar Chart for Number of Matches Won by Each Team
# Count the number of matches won by each team
winner_count = data['Winner'].value_counts().reset_index()
winner_count.columns = ['Winner', 'count']

# Create a bar chart
bar_chart_winner = alt.Chart(winner_count).mark_bar().encode(
    x='Winner',
    y='count'
).properties(
    title='Number of Matches Won by Each Team'
)

bar_chart_winner


In [10]:
#c) Grouped Bar Chart for Average Scores in First and Second Innings by Venue
# Convert scores to numeric values
data['Score_of_first_innings'] = pd.to_numeric(data['Score_of_first_innings'], errors='coerce')
data['Score_of_second_innings'] = pd.to_numeric(data['Score_of_second_innings'], errors='coerce')

# Rename columns to remove any leading or trailing spaces
data.columns = data.columns.str.strip()

# Calculate average scores for each venue
avg_scores_by_venue = data.groupby('Venue').agg({
    'Score_of_first_innings': 'mean',
    'Score_of_second_innings': 'mean'
}).reset_index()

# Melt the DataFrame for Altair
avg_scores_by_venue = avg_scores_by_venue.melt(id_vars='Venue', value_vars=['Score_of_first_innings', 'Score_of_second_innings'])

# Create a grouped bar chart
grouped_bar_scores = alt.Chart(avg_scores_by_venue).mark_bar().encode(
    x='variable:O',
    y='value:Q',
    color='variable:N',
    column='Venue:N'
).properties(
    title='Average Scores in First and Second Innings by Venue'
)

grouped_bar_scores


In [11]:
#d) Scatter Chart for Scores in First vs. Second Innings
# Create a scatter plot
scatter_scores = alt.Chart(data).mark_circle(size=60).encode(
    x='Score_of_first_innings',
    y='Score_of_second_innings',
    color='Winner',
    tooltip=['Match_id', 'Team_1', 'Team_2', 'Score_of_first_innings', 'Score_of_second_innings', 'Winner']
).properties(
    title='Scatter Plot of Scores in First vs. Second Innings'
)

scatter_scores


In [12]:
#f) Line Chart for Match Scores Over Time
# Create a line chart
line_chart_scores = alt.Chart(data).mark_line().encode(
    x='Match_id',
    y='Score_of_first_innings',
    color='Team_1',
    tooltip=['Match_id', 'Team_1', 'Score_of_first_innings']
).properties(
    title='Scores of First Innings Over Time'
)

line_chart_scores


In [13]:
#g) Hybrid Chart (Scatter Plot Overlay on Heatmap)
# Create a heatmap
heatmap_scores = alt.Chart(data).mark_rect().encode(
    alt.X('Score_of_first_innings:Q', bin=True),
    alt.Y('Score_of_second_innings:Q', bin=True),
    color='count()'
).properties(
    title='Heatmap of Scores in First vs Second Innings'
)

# Overlay with scatter plot
scatter_overlay_scores = alt.Chart(data).mark_circle(size=60).encode(
    x='Score_of_first_innings',
    y='Score_of_second_innings',
    color='Winner',
    tooltip=['Match_id', 'Team_1', 'Team_2', 'Score_of_first_innings', 'Score_of_second_innings', 'Winner']
)

hybrid_chart_scores = heatmap_scores + scatter_overlay_scores
hybrid_chart_scores
