In [1]:
from http.server import BaseHTTPRequestHandler, HTTPServer
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum
import os
from glob import glob
import json

class HTTPRequestHandler(BaseHTTPRequestHandler):
    def do_GET(self):
        if self.path == '/least_suffering_country':
            # Create a SparkSession
            spark = SparkSession.builder \
                .appName("Least Suffering Country") \
                .getOrCreate()

            # Define the directory containing the CSV files
            directory = "/Users/meghasingh/Desktop/Spark assignment/"

            # Find all CSV files in the directory
            file_paths = glob(os.path.join(directory, "*.csv"))

            # Read each CSV file into a DataFrame
            dfs = [spark.read.csv(file_path, header=True, inferSchema=True) for file_path in file_paths]

            # Union all DataFrames
            merged_df = dfs[0]
            for df in dfs[1:]:
                merged_df = merged_df.union(df)

            # Calculate total critical cases for each country
            country_stats = merged_df.groupBy("country") \
                .agg(sum("critical").alias("total_critical_cases"))

            # Find the country with the least number of critical cases
            least_suffering_country = country_stats.orderBy(col("total_critical_cases")).select("country").first()[0]

            # Stop the SparkSession
            spark.stop()

            # Send response
            self.send_response(200)
            self.send_header('Content-type', 'application/json')
            self.end_headers()
            response = {'least_suffering_country': least_suffering_country}
            self.wfile.write(json.dumps(response).encode())

        else:
            # Send 404 response for other paths
            self.send_response(404)
            self.end_headers()
            self.wfile.write(b'404 Not Found')

def run_server(port=8008):
    server_address = ('', port)
    httpd = HTTPServer(server_address, HTTPRequestHandler)
    print(f'Starting server on port {port}...')
    httpd.serve_forever()

if __name__ == '__main__':
    run_server()

Starting server on port 8008...


127.0.0.1 - - [29/Mar/2024 12:00:10] "GET /most_suffering_country HTTP/1.1" 404 -
127.0.0.1 - - [29/Mar/2024 12:00:52] "GET /most_suffering_country HTTP/1.1" 404 -
127.0.0.1 - - [29/Mar/2024 12:02:25] "GET /most_suffering_country HTTP/1.1" 404 -
127.0.0.1 - - [29/Mar/2024 12:03:37] "GET /most_suffering_country HTTP/1.1" 404 -
127.0.0.1 - - [29/Mar/2024 12:06:18] "GET /most_suffering_country HTTP/1.1" 404 -
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/03/29 12:08:57 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
24/03/29 12:08:57 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
24/03/29 12:08:57 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.
127.0.0.1 - - [29/Mar/2024 12:09:03] "GET /least_suffering_country HTTP/1.1" 200 -
24/03/29 12:09:50 WARN Utils: Servic