In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('/kaggle/input/top-50-us-tech-companies-2022-2023-dataset/Top 50 US Tech Companies 2022 - 2023.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 10 columns):
 #   Column                                            Non-Null Count  Dtype  
---  ------                                            --------------  -----  
 0   Company Name                                      50 non-null     object 
 1   Industry                                          50 non-null     object 
 2   Sector                                            50 non-null     object 
 3   HQ State                                          50 non-null     object 
 4   Founding Year                                     50 non-null     int64  
 5   Annual Revenue 2022-2023 (USD in Billions)        50 non-null     float64
 6   Market Cap (USD in Trillions)                     50 non-null     float64
 7   Stock Name                                        50 non-null     object 
 8   Annual Income Tax in 2022-2023 (USD in Billions)  50 non-null     float64
 9   Employee Size          

In [4]:
desc_stats = df[['Annual Revenue 2022-2023 (USD in Billions)', 
                 'Market Cap (USD in Trillions)', 
                 'Annual Income Tax in 2022-2023 (USD in Billions)', 
                 'Employee Size']].describe()


print(desc_stats)


       Annual Revenue 2022-2023 (USD in Billions)  \
count                                    50.00000   
mean                                     51.20440   
std                                      97.41288   
min                                       2.06000   
25%                                       7.65250   
50%                                      17.66500   
75%                                      40.81500   
max                                     513.98000   

       Market Cap (USD in Trillions)  \
count                      50.000000   
mean                        0.252160   
std                         0.490377   
min                         0.028000   
25%                         0.051250   
50%                         0.082500   
75%                         0.160250   
max                         2.520000   

       Annual Income Tax in 2022-2023 (USD in Billions)  Employee Size  
count                                         50.000000   5.000000e+01  
mean           

#  basic descriptive statistics for the numerical variables in the dataset

In [5]:
import pandas as pd
import plotly.express as px
num_cols = ['Founding Year', 'Annual Revenue 2022-2023 (USD in Billions)', 'Market Cap (USD in Trillions)', 
            'Annual Income Tax in 2022-2023 (USD in Billions)', 'Employee Size']
num_df = df[num_cols]
corr_matrix = num_df.corr()
fig1 = px.imshow(corr_matrix, color_continuous_scale='Viridis')
fig1.update_layout(title='Correlation Matrix')
fig2 = px.scatter_matrix(num_df, color='Market Cap (USD in Trillions)', dimensions=num_cols)
fig2.update_traces(diagonal_visible=False)
fig1.show()
fig2.show()

#  Industry and sector analysis

In [6]:



industry_summary = df.groupby('Industry')[['Annual Revenue 2022-2023 (USD in Billions)', 'Market Cap (USD in Trillions)', 'Employee Size']].mean().reset_index()
sector_summary = df.groupby('Sector')[['Annual Revenue 2022-2023 (USD in Billions)', 'Market Cap (USD in Trillions)', 'Employee Size']].mean().reset_index()
fig1 = px.bar(industry_summary, x='Industry', y='Annual Revenue 2022-2023 (USD in Billions)', color='Industry', title='Average Revenue by Industry')
fig2 = px.bar(sector_summary, x='Sector', y='Annual Revenue 2022-2023 (USD in Billions)', color='Sector', title='Average Revenue by Sector')
fig3 = px.scatter(df, x='Market Cap (USD in Trillions)', y='Employee Size', color='Industry', title='Market Cap vs Employee Size by Industry')
fig4 = px.scatter(df, x='Market Cap (USD in Trillions)', y='Employee Size', color='Sector', title='Market Cap vs Employee Size by Sector')
fig1.show()
fig2.show()
fig3.show()
fig4.show()


# Number of Top Tech Companies by State

In [7]:
import plotly.express as px

state_counts = df.groupby('HQ State').size().reset_index(name='Counts')

fig = px.bar(state_counts, x='HQ State', y='Counts',
             title='Number of Top Tech Companies by State',
             labels={'HQ State': 'State', 'Counts': 'Number of Companies'},
             color='Counts',
             color_continuous_scale='Blues')

fig.update_layout(xaxis_tickangle=-45)

fig.show()


# Annual Revenue and Income Tax over Time

In [8]:
import plotly.graph_objs as go
fig = go.Figure()

In [9]:
for company in df['Company Name'].unique():
    company_data = df[df['Company Name'] == company]
    fig.add_trace(go.Scatter(x=company_data['Founding Year'], y=company_data['Annual Revenue 2022-2023 (USD in Billions)'],
                             mode='lines+markers', name=company, line=dict(width=2), connectgaps=True))

    fig.add_trace(go.Scatter(x=company_data['Founding Year'], y=company_data['Annual Income Tax in 2022-2023 (USD in Billions)'],
                             mode='lines+markers', name=company, line=dict(width=2, dash='dot'), connectgaps=True))

fig.update_layout(title='Annual Revenue and Income Tax over Time',
                  xaxis_title='Founding Year',
                  yaxis_title='USD in Billions',
                  legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1))

fig.show()