## Load the Data

In [1]:
import pandas as pd
data = pd.read_csv('World-Stock-Prices-Dataset.csv')
print(data.head())

                        Date        Open        High         Low       Close  \
0  2024-12-30 00:00:00-05:00    9.140000    9.255000    8.780000    8.960000   
1  2024-12-30 00:00:00-05:00   10.420000   10.466000   10.220000   10.400000   
2  2024-12-30 00:00:00-05:00  121.750000  124.050003  121.080002  122.389999   
3  2024-12-30 00:00:00-05:00  297.350006  299.700012  294.130005  297.329987   
4  2024-12-30 00:00:00-05:00   45.000000   45.000000   45.000000   45.000000   

       Volume  Dividends  Stock Splits        Brand_Name Ticker Industry_Tag  \
0  10714100.0        0.0           0.0           peloton   PTON      fitness   
1   7919800.0        0.0           0.0          zoominfo     ZI   technology   
2     66500.0        0.0           0.0            adidas  ADDYY      apparel   
3   2392800.0        0.0           0.0  american express    AXP      finance   
4       100.0        0.0           0.0              puma  PMMAF      apparel   

   Country  Capital Gains  
0      usa

## Inspect the Data

In [2]:
print(data.info())
print(data.isnull().sum())
print(data.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301149 entries, 0 to 301148
Data columns (total 13 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   Date           301149 non-null  object 
 1   Open           301149 non-null  float64
 2   High           301149 non-null  float64
 3   Low            301149 non-null  float64
 4   Close          301149 non-null  float64
 5   Volume         301149 non-null  float64
 6   Dividends      301149 non-null  float64
 7   Stock Splits   301149 non-null  float64
 8   Brand_Name     301149 non-null  object 
 9   Ticker         301149 non-null  object 
 10  Industry_Tag   301149 non-null  object 
 11  Country        301149 non-null  object 
 12  Capital Gains  2 non-null       float64
dtypes: float64(8), object(5)
memory usage: 29.9+ MB
None
Date                  0
Open                  0
High                  0
Low                   0
Close                 0
Volume                0
Dividends   

1. Which stocks have the highest average closing price?

In [3]:
highest_avg_closing_price = data.groupby('Brand_Name')['Close'].mean().sort_values(ascending=False)
print(highest_avg_closing_price.head(10))

Brand_Name
chipotle                     637.664758
spotify                      199.412973
costco                       169.109141
coinbase                     160.874013
netflix                      159.352637
mastercard                   156.360913
zoom video communications    151.971254
adobe                        149.223389
airbnb                       139.829491
fedex                        124.311219
Name: Close, dtype: float64


## 2. Which stocks have the most ups and downs in price?

In [4]:
data['Price_Range'] = data['High'] - data['Low']
volatility = data.groupby('Brand_Name')['Price_Range'].std().sort_values(ascending=False)
print(volatility.head(10))


Brand_Name
chipotle                     17.299721
puma                          9.561596
zoom video communications     7.772880
coinbase                      7.580145
netflix                       6.652394
tesla                         5.916729
adobe                         5.287748
spotify                       4.681261
costco                        4.305046
block                         3.693704
Name: Price_Range, dtype: float64


## 3. How do stock prices differ by industry?

In [5]:
industry_prices = data.groupby('Industry_Tag')['Close'].mean().sort_values(ascending=False)
print(industry_prices)


Industry_Tag
food                  330.269366
music                 199.412973
cryptocurrency        160.874013
logistics             124.311219
entertainment         109.478107
finance                95.094659
retail                 92.142468
manufacturing          86.454537
hospitality            85.059395
financial services     81.800176
healthcare             76.854898
luxury goods           58.738559
technology             58.118998
automotive             56.849198
food & beverage        48.116298
consumer goods         46.131450
e-commerce             45.376077
apparel                40.609689
fitness                38.318827
social media           35.040361
footwear               29.875300
aviation               27.403690
gaming                 10.940576
Name: Close, dtype: float64


## 4. Is there a link between trading volume and price changes?

In [6]:
data['Price_Change_Percent'] = (data['Close'] - data['Open']) / data['Open'] * 100
correlation = data['Volume'].corr(data['Price_Change_Percent'])
print(f"Correlation between Volume and Price Change: {correlation}")

Correlation between Volume and Price Change: 0.001205730240838557


## 5. How have technology stocks changed over time?

In [7]:
tech_stocks = data[data['Industry_Tag'] == 'technology']
tech_trend = tech_stocks.groupby('Date')['Close'].mean()
print(tech_trend)

Date
2000-01-03 00:00:00-05:00     16.126461
2000-01-04 00:00:00-05:00     15.291267
2000-01-05 00:00:00-05:00     15.492854
2000-01-06 00:00:00-05:00     15.261182
2000-01-07 00:00:00-05:00     15.854166
                                ...    
2024-12-23 00:00:00-05:00    174.172306
2024-12-24 00:00:00-05:00    175.311538
2024-12-26 00:00:00-05:00    175.168460
2024-12-27 00:00:00-05:00    173.093847
2024-12-30 00:00:00-05:00    171.600767
Name: Close, Length: 6287, dtype: float64


## 6. Which countries trade the most stocks?

In [8]:
country_trading = data.groupby('Country')['Volume'].sum().sort_values(ascending=False)
print(country_trading)

Country
usa            6.834144e+12
canada         4.069556e+10
netherlands    1.473837e+10
japan          1.406281e+10
switzerland    3.839945e+09
germany        1.035597e+09
france         7.900204e+08
Name: Volume, dtype: float64


## 7. Do dividends affect stock prices?

In [9]:
with_dividends = data[data['Dividends'] > 0]['Close']
without_dividends = data[data['Dividends'] == 0]['Close']
print(f"Average closing price with dividends: {with_dividends.mean()}")
print(f"Average closing price without dividends: {without_dividends.mean()}")

Average closing price with dividends: 70.10270793235144
Average closing price without dividends: 73.2435608142125


## 8. What is the average trading volume for the top 10 stocks?

In [10]:
top_10_stocks = highest_avg_closing_price.head(10).index
avg_volume_top_10 = data[data['Brand_Name'].isin(top_10_stocks)].groupby('Brand_Name')['Volume'].mean()
print(avg_volume_top_10)

Brand_Name
adobe                        5.143282e+06
airbnb                       6.079107e+06
chipotle                     1.043253e+06
coinbase                     1.095625e+07
costco                       3.281232e+06
fedex                        2.147052e+06
mastercard                   1.065248e+07
netflix                      1.529734e+07
spotify                      1.843854e+06
zoom video communications    4.885597e+06
Name: Volume, dtype: float64


## 9. How do stocks in the same industry perform differently?

In [11]:
industry_comparison = data.groupby(['Industry_Tag', 'Brand_Name'])['Close'].agg(['mean', 'median', 'std'])
print(industry_comparison)

                                              mean     median         std
Industry_Tag Brand_Name                                                  
apparel      adidas                      69.826511  49.372910   47.392709
             american eagle outfitters   11.972851  11.448223    5.984897
             nike                        43.086653  22.253480   42.136406
             puma                        49.460851  45.227180   27.403973
automotive   bmw group                   65.532294  65.053474   20.158746
...                                            ...        ...         ...
technology   philips                     24.068449  22.261827    9.826423
             salesforce / slack          92.829250  59.115000   88.180636
             uber                        44.636949  41.504999   15.750700
             zoom video communications  151.971254  84.660004  121.725067
             zoominfo                    33.964049  33.910000   17.917786

[61 rows x 3 columns]


## 10. What happens to stock prices after a stock split?

In [12]:
stock_splits = data[data['Stock Splits'] > 0]
price_trend = stock_splits.groupby('Date')['Close'].mean()
print(price_trend)

Date
2000-01-14 00:00:00-05:00     33.956917
2000-03-23 00:00:00-05:00     54.000790
2000-04-18 00:00:00-04:00     22.822477
2000-05-11 00:00:00-04:00     31.452337
2000-06-12 00:00:00-04:00     24.509317
                                ...    
2022-08-25 00:00:00-04:00    296.070007
2022-10-04 00:00:00-04:00     10.740000
2024-04-01 00:00:00-04:00     93.371521
2024-06-10 00:00:00-04:00    121.779999
2024-06-26 00:00:00-04:00     65.860001
Name: Close, Length: 65, dtype: float64
