# Dataset Exploration

This notebook explores the datasets I collected with their structure, data types, and content.

In [None]:
import pandas as pd
import numpy as np

apple_df = pd.read_csv("apple_stock.csv")
google_df = pd.read_csv("google_stock.csv")
tesla_df = pd.read_csv("tesla_stock.csv")

print(f"Apple dataset shape: {apple_df.shape}")
print(f"Google dataset shape: {google_df.shape}")
print(f"Tesla dataset shape: {tesla_df.shape}")

: 

## Dataset 1: Apple Stock 

In [None]:
print(apple_df.tail(7))

numerical_columns = apple_df.select_dtypes(include=[np.number]).columns
print(list(numerical_columns))

apple_numerical = apple_df[numerical_columns].values
print(apple_numerical.shape)
print(apple_numerical[:5])


In [None]:
print(f"Total columns: {len(apple_df.columns)}")
print(f"Numerical columns: {len(numerical_columns)}")
print(f"Non-numerical columns: {len(apple_df.columns) - len(numerical_columns)}")

print(apple_df.isnull().sum())

print(apple_df.dtypes)


## Dataset 2: Google Stock

Let's examine the Google dataset structure and data types.

In [None]:
print(google_df.head(2))

print(google_df.dtypes)

for col in google_df.columns:
    dtype = google_df[col].dtype
    print(f"{col}: {dtype}")
    
    # Check if numerical columns might be read as strings
    if dtype == 'object':
        # Check if it's actually numerical data stored as strings
        try:
            pd.to_numeric(google_df[col], errors='raise')
        except:
            return
    else:
        return


## Dataset 3: Tesla Stock

Let's examine the Tesla dataset by looking at specific rows and columns.


In [None]:
numerical_cols = tesla_df.select_dtypes(include=[np.number]).columns
if len(numerical_cols) >= 2:
    col1, col2 = numerical_cols[0], numerical_cols[1]
    print(f"Selected columns: {col1} and {col2}")
    
    selected_rows = [2, 5, 8]
    selected_data = tesla_df.iloc[selected_rows][[col1, col2]]
    
    print(selected_data)
    

    for idx in selected_rows:
        if idx < len(tesla_df):
            print(f"Row {idx + 1}: {col1}={tesla_df.iloc[idx][col1]:.2f}, {col2}={tesla_df.iloc[idx][col2]:.2f}")
else:
    print(tesla_df.iloc[[2, 5, 8]])
