In [2]:
# Select kernal Python 3.12.7

Problem 1: NASA APOD Data Retrieval and JSON File Processing

In [None]:
from dotenv import load_dotenv
import os
import json

load_dotenv()

key = os.getenv('API_KEY')
api = os.getenv('API_URL')
# To ensure connection.
print(f"API URL is: {api}")

In [None]:
# APICall
import requests

def get_apod_data(api_key, date):
    url  = f"{api}/planetary/apod?api_key={api_key}&date={date}"

    try:
        response = requests.get(url)
        #make sure status is 200
        response.raise_for_status()
        data = response.json()
        return {
            "date": data["date"],
            "title": data["title"],
            "url": data["url"],
            "explanation": data["explanation"],
            "media_type": data["media_type"]

        }
    
    except requests.exceptions.RequestException as error:
        print("got error at RequestException")
        print(error)
    except KeyError:
        print("Invalid response")

date = get_apod_data(key , "2020-01-01")

print(date)

In [70]:
# Fetching Data for a DateRange
import time
from datetime import datetime , timedelta

def fetch_multiple_apod_data(api_key, start_date, end_date):
   start_date = datetime.strptime(start_date , "%Y-%m-%d")
   end_date = datetime.strptime(end_date , "%Y-%m-%d")

   wholeData = []
   in_loop_date = start_date
   
   while in_loop_date <= end_date:
       formatted_date = in_loop_date.strftime("%Y-%m-%d")

       date_data = get_apod_data(api_key ,formatted_date)
       
       wholeData.append(date_data)
       in_loop_date += timedelta(days=1)
       time.sleep(1)
       
       
   return wholeData

start_date = "2021-01-01"
end_date = "2021-12-31" 
year_data = fetch_multiple_apod_data(key, start_date, end_date)


file_name = "retrieved_data.json"
if not os.path.exists(file_name):
        with open(file_name, 'w') as file:
            json.dump([], file) 

# Read the existing data from the file.
with open(file_name, 'r') as file:
    wholeData = json.load(file)
   
 # Write the updated data back to the file.
with open(file_name, 'w') as file:
    json.dump(year_data, file, indent=4)




Problem 2: JSON Data Reading,Looping,and Processing

In [None]:
def read_apod_data():
    try:
        # Open and read the JSON file
        with open("retrieved_data.json", "r") as file:
            apod_data = json.load(file)  # Load JSON into a dictionary
            return apod_data
    except FileNotFoundError:
        print("Error: 'apod_data.json' file not found.")
        return None
    except json.JSONDecodeError:
        print("Error: 'apod_data.json' contains invalid JSON.")
        return None


data = read_apod_data()

for record in data:
  
    print({
            "date": record["date"],
            "title": record["title"]
         }) 

In [None]:
def analyze_apod_media(data):
    totalImage = 0
    totalVideo = 0

    # Loop through each record and count media types
    for record in data:
        if record["media_type"] == "image":
            totalImage += 1
        elif record["media_type"] == "video":
            totalVideo += 1

    # Print the results
    print(f"Total count of Images: {totalImage}")
    print(f"Total count of Videos: {totalVideo}")
    
    # Find the entry with the longest explanation
    longest_text = max(data, key=lambda entry: len(entry.get("explanation", "")))
    print("Entry with the longest explanation:")
    print(json.dumps(longest_text, indent=4))

analyze_apod_media(data)

In [None]:
import json
import csv
import os

def write_apod_to_csv(json_file, csv_file):
    try:
        # Load the JSON file
        with open(json_file, "r") as file:
            data = json.load(file)
        
        # Open the CSV file in append mode (create it if it doesn't exist)
        file_exists = os.path.exists(csv_file)
        
        with open(csv_file, mode='a', newline='') as csvfile:
            fieldnames = ['date', 'title', 'media_type', 'url']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            
            # Write headers only if the file doesn't already exist
            if not file_exists:
                writer.writeheader()
            
            # Loop through the data and write each entry to the CSV
            for entry in data:
                writer.writerow({
                    'date': entry['date'],
                    'title': entry['title'],
                    'media_type': entry['media_type'],
                    'url': entry['url']
                })
        
        print(f"Data successfully written to {csv_file}.")
    
    except FileNotFoundError:
        print(f"Error: File '{json_file}' not found.")
    except json.JSONDecodeError:
        print("Error: Invalid JSON format in the file.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


write_apod_to_csv("retrieved_data.json", "apod_summary.csv")


Problem3- Numpy Array Manipulation and Statistical Functions

In [2]:
import numpy as np

In [8]:
# Genrates random numbers between 10 to 100 for 20 rows and 5 columns and saves the output in var data.
data = np.random.randint(10, 100, size=(20, 5))


In [None]:
# Displays the dataset.
data

In [None]:
# Caculate the sum of each row. 
for d in range(data.shape[0]):
    # If sum/2 has a reminder, subtract 1 from the first element in the row to get an even number.
    rowSum= np.sum(data[d])
    if rowSum % 2 !=0:
        data[d,0] -=1
    print(rowSum) 
    



In [None]:
# Calculate the total sum and the remainder.
SumOfValues = np.sum(data)
remaining = SumOfValues % 5

if remaining == 0:
    print(f"Orginal Sum is a multiple of 5: {SumOfValues}")
else:
    # Adjust the first element to make the total sum a multiple of 5.
    if data[0, 0] >= remaining:
        data[0, 0] -= remaining
    else:
        data[0, 0] += (5 - remaining)
    print(f"Updated Array:\n{data}")
    print(f"Updated Sum: {np.sum(data)}")

In [None]:
# An empty array that will collect divisible digits.
divisible = []
for row in range(data.shape[0]):
    for d in data[row]:
        # Condtion must be meet for bith 3 and 5.
        if d % 3 == 0 and d % 5 == 0:
            # Adds  divisible digit to array divisible.
           divisible.append(d) 
print(f"Numbers divisible by 3 and 5 are:\n {divisible}")

In [None]:

# Replace elements greater than 75 with the mean value
data[data > 75] = np.mean(data)

print(f"Edited Array:\n {data}")


In [None]:
# Mean of all values in the array.
mean_value = np.mean(data)
print(f"\nMean of the array: {mean_value}")

# Standard deviation of all values in the array
std_dev = np.std(data)
print(f"Standard Deviation of the array: {std_dev}")

# Median of all values in the array
median_value = np.median(data)
print(f"Median of the array: {median_value}")

# Variance of each column
variance_columns = np.var(data, axis=0)
print(f"Variance for each column: {variance_columns}")


Problem 4 -Working with Pandas DataFrames

In [17]:
# Import libraries.
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt


In [18]:
# Read iris.csv into Python as a pandas.
irisData= pd.read_csv('iris.csv')
irisData

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [None]:
# Data size.
irisData.shape

In [None]:
# Data types.
irisData.info()

In [None]:
# Names of all the columns.
irisData.columns

In [None]:
# Species of flower.
irisData.Species.unique()

In [None]:
#  List of affected rows.
affectedRows = irisData.iloc[[35, 38]]
affectedRows

In [24]:
# Updating rows.
irisData.iloc[35] = [4.9, 3.1, 1.5, 0.2, "setosa"]  
irisData.iloc[38] = [4.9, 3.6, 1.4, 0.1, "setosa"] 

In [25]:
# Checking fix.
updatedRows = irisData.iloc[[35,38]] 
print(updatedRows)

    Sepal.Length  Sepal.Width  Petal.Length  Petal.Width Species
35           4.9          3.1           1.5          0.2  setosa
38           4.9          3.6           1.4          0.1  setosa


In [None]:
# New features, which are the ratio of 2 columns.
irisData['Petal.Ratio'] = irisData['Petal.Length']/irisData['Petal.Width']
irisData['Sepal.Ratio'] = irisData['Sepal.Length']/irisData['Sepal.Width']
irisData

In [27]:
# Save to cvs as iris_corrected.csv.
irisData.to_csv("iris_corrected.csv", index=False)

In [None]:
# pairwise correlation.
# Remove categorical feature Species.
numericColumns = irisData.drop('Species', axis=1)
numericColumns
correlation_matrix = numericColumns.corr()  
correlation_matrix

In [None]:
# scatter plot with Sepal Ratio on the x-axis and Petal Ratio on the y-axis
from scipy.stats import linregress
plt.figure(figsize=(8,5))
plt.scatter(irisData['Sepal.Ratio'], irisData['Petal.Ratio'], color='green')
plt.title('Sepal.Ratio vs Petal.Ratio')
plt.xlabel('Sepal.Ratio')
plt.ylabel('Petal.Ratio %')
sns.regplot(data=irisData, x='Sepal.Ratio', y='Petal.Ratio')
plt.savefig("iris_scatter_with_regression.pdf", dpi=300)
plt.show()

In [None]:
# a pair plot of all numericColumns
sns.pairplot(numericColumns)
plt.show()