In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress  # Import linregress directly from scipy.stats

# Step 1: Import the data
df = pd.read_csv('epa-sea-level.csv')

# Step 2: Create a scatter plot
def scatterPlot():
    plt.figure(figsize=(10, 8))
    plt.scatter(df['Year'], df['CSIRO Adjusted Sea Level'], color='blue', alpha=0.5)
    
    plt.xlabel('Year')
    plt.ylabel('CSIRO Adjusted Sea Level (inches)')
    plt.title('Scatter Plot of Sea Level by Year')
    plt.grid(True)
    plt.show()

scatterPlot()

# Step 3: Line regression for the entire dataset
def line_regression():
    year = df['Year']
    level_sea = df['CSIRO Adjusted Sea Level']
    
    # Calculate the slope and intercept for the line of best fit
    slope, intercept, r_value, p_value, std_err = linregress(year, level_sea)
    
    # Predict sea level for 2050
    predict_till = 2050
    predicted_sea_level = slope * predict_till + intercept 

    # Extend years for plotting
    extended_year = np.arange(year.min(), predict_till + 1)  # Extend to include 2050
    line_of_best_fit = slope * extended_year + intercept 

    plt.figure(figsize=(10, 8))
    plt.scatter(year, level_sea, label='Observed Data', color='blue', alpha=0.5)
    plt.plot(extended_year, line_of_best_fit, color='red', label='Best Fit Line')
    plt.scatter(predict_till, predicted_sea_level, color='green', label='Prediction for 2050')
   
    plt.xlabel('Year')
    plt.ylabel('Sea Level (inches)')
    plt.axvline(x=2050, linestyle='--', color='gray', label='Year 2050')
    plt.title('Sea Level Prediction')
    plt.legend()
    plt.grid()
    plt.show()

line_regression()

# Step 4: Line regression for data from the year 2000 onward
def data_2000():
    df_2000 = df[df['Year'] >= 2000]

    year = df_2000['Year']
    level = df_2000['CSIRO Adjusted Sea Level']

    # Calculate the slope and intercept for the line of best fit
    slope, intercept, r_value, p_value, std_err = linregress(year, level)

    # Predict sea level for 2050
    year_predict = 2050
    line_predict = slope * year_predict + intercept

    # Extend years for plotting
    extended_years = np.arange(year.min(), year_predict + 1)
    line_extended = slope * extended_years + intercept

    plt.figure(figsize=(10, 8))
    plt.scatter(year, level, label='Observed Data', color='blue', alpha=0.5)
    plt.plot(extended_years, line_extended, color='red', label='Best Fit Line (from 2000 onward)')
    plt.scatter(year_predict, line_predict, color='green', label='Prediction for 2050')
    
    plt.xlabel('Year')
    plt.ylabel('Sea Level (inches)')
    plt.axvline(x=2050, color='gray', linestyle='--', label='Year 2050')
    plt.title('Prediction of Sea Level Rise from 2000 to 2050')
    plt.legend()
    plt.grid()
    plt.show()

data_2000()
