In [20]:
import pandas as pd

df = pd.read_csv("bloomberg_data.csv")

# Display basic information about the dataset
df_info = df.info()

# Display summary statistics for numerical columns
df_description = df.describe()

# Check for missing values
missing_values = df.isnull().sum()

# Check for duplicate rows
duplicate_count = df.duplicated().sum()

# Count unique values in each column
unique_counts = df.nunique()

# Show the results
df_info, df_description, missing_values, duplicate_count, unique_counts

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3980 entries, 0 to 3979
Data columns (total 26 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Issuer Ticker                3978 non-null   object 
 1   Issuer Name                  3978 non-null   object 
 2   Sales - 1 Yr Growth          2535 non-null   float64
 3   Profit Margin                2709 non-null   float64
 4   Return on Assets             3328 non-null   float64
 5   Offer Size (M)               3980 non-null   float64
 6   Shares Outstanding (M)       3980 non-null   float64
 7   Offer Price                  3980 non-null   float64
 8   Offer To 1st Close           3933 non-null   float64
 9   Market Cap at Offer (M)      3980 non-null   float64
 10  Trade Date (US)              3980 non-null   object 
 11  cusip                        3978 non-null   object 
 12  Cash Flow per Share          2965 non-null   float64
 13  Offer Size (M).1  

(None,
        Sales - 1 Yr Growth  Profit Margin  Return on Assets  Offer Size (M)  \
 count          2535.000000    2709.000000       3328.000000     3980.000000   
 mean            466.440399    -734.150672         -4.857451      281.040474   
 std           13916.890782   12073.730675        820.334548      685.860848   
 min           -1025.400000 -444661.000000      -3400.000000        0.100000   
 25%              -0.600000     -44.700000        -27.600000       70.003000   
 50%              14.400000      -1.900000         -1.400000      136.562000   
 75%              38.500000      10.100000          3.100000      277.777000   
 max          662658.000000   36561.800000      47045.400000    19650.400000   
 
        Shares Outstanding (M)  Offer Price  Offer To 1st Close  \
 count             3980.000000  3980.000000         3933.000000   
 mean                38.477917    14.764685           14.578712   
 std                 84.787259     6.866491           34.709972   
 mi

In [None]:
# Re-load necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Reload the dataset
df_new = pd.read_csv("bloomberg_data.csv")

# Convert 'Trade Date (US)' to datetime format
df_new['Trade Date (US)'] = pd.to_datetime(df_new['Trade Date (US)'], errors='coerce')

# Distribution of IPO Offer Prices
plt.figure(figsize=(8, 5))
sns.histplot(df_new['Offer Price'].dropna(), bins=30, kde=True)
plt.xlabel('Offer Price')
plt.ylabel('Frequency')
plt.title('Distribution of IPO Offer Prices')
plt.show()

# Distribution of Market Capitalization at Offer
plt.figure(figsize=(8, 5))
sns.histplot(df_new['Market Cap at Offer (M)'].dropna(), bins=30, kde=True)
plt.xlabel('Market Cap at Offer (M)')
plt.ylabel('Frequency')
plt.title('Distribution of Market Capitalization at Offer')
plt.show()

# Number of IPOs over time
df_new.set_index('Trade Date (US)', inplace=True)
df_new['IPO Count'] = 1
ipo_trend = df_new.resample('Y')['IPO Count'].sum()

plt.figure(figsize=(10, 5))
ipo_trend.plot()
plt.xlabel('Year')
plt.ylabel('Number of IPOs')
plt.title('IPO Trends Over Time')
plt.grid(True)
plt.show()

# Industry Sector Distribution
plt.figure(figsize=(10, 5))
df_new['Industry Sector'].value_counts().plot(kind='bar')
plt.xlabel('Industry Sector')
plt.ylabel('Count')
plt.title('Distribution of IPOs by Industry Sector')
plt.xticks(rotation=45)
plt.show()

# Correlation Matrix
plt.figure(figsize=(12, 6))
sns.heatmap(df_new.select_dtypes(include=['float64']).corr(), annot=False, cmap='coolwarm')
plt.title('Correlation Matrix of Numeric Features')
plt.show()