In [9]:
import pandas as pd


class MovieComparison:
    def __init__(self, rating_paths, movies_path):
        self.rating_paths = rating_paths
        self.movies_path = movies_path
        self.ratings = None
        self.movies = None

    def load_ratings(self):
        """Loads ratings data from the given paths."""
        self.ratings = pd.concat(
            [pd.read_csv(path, delimiter=",", header=None, names=["movie_id", "user_id", "date", "rating"])
                .assign(date=lambda df: pd.to_datetime(df['date'], format='%Y-%m-%d'))
             for path in self.rating_paths],
            ignore_index=True
        )

    def load_movies(self):
        """Loads movie titles data from the given CSV file."""
        movies = []
        with open(self.movies_path, encoding='ISO-8859-1') as file:
            for line in file:
                splitted_line = line.strip().split(",")

                movie_id, prod_year = splitted_line[:2]
                movie_name = ",".join(splitted_line[2:])

                if movie_id.strip().upper() == "NULL" or not movie_id.strip():
                    continue 
                if prod_year.strip().upper() == "NULL" or not prod_year.strip():
                    continue 
                if movie_name.strip().upper() == "NULL" or not movie_name.strip():
                    continue

                try:
                    movie = {
                        "movie_id": int(movie_id),
                        "movie_name": movie_name,
                        "movie_year": int(prod_year)
                    }
                    movies.append(movie)
                except ValueError:
                    continue

        self.movies = pd.DataFrame(movies)

    def compare_movies(self, movie_id_1, movie_id_2):
        """Compares two movies based on the criteria."""

        latest_date = self.ratings['date'].max()
        six_months_ago = latest_date - pd.DateOffset(months=6)

        ratings_movie_1 = self.ratings[(self.ratings['movie_id'] == movie_id_1) & (self.ratings['date'] >= six_months_ago)]
        ratings_movie_2 = self.ratings[(self.ratings['movie_id'] == movie_id_2) & (self.ratings['date'] >= six_months_ago)]

        if ratings_movie_1.empty and ratings_movie_2.empty:
            return "Both movies have not been watched in the last 6 months. Consider removing both."

        elif ratings_movie_1.empty:
            return f"Movie {movie_id_1} has not been watched in the last 6 months. Consider removing it."
        
        elif ratings_movie_2.empty:
            return f"Movie {movie_id_2} has not been watched in the last 6 months. Consider removing it."

        avg_rating_1 = ratings_movie_1['rating'].mean()
        avg_rating_2 = ratings_movie_2['rating'].mean()

        if avg_rating_1 > avg_rating_2:
            return f"Movie {movie_id_1} is preferred over Movie {movie_id_2} based on the average rating."
        elif avg_rating_2 > avg_rating_1:
            return f"Movie {movie_id_2} is preferred over Movie {movie_id_1} based on the average rating."
        else:
            count_1 = ratings_movie_1.shape[0]
            count_2 = ratings_movie_2.shape[0]

            if count_1 > count_2:
                return f"Movie {movie_id_1} is preferred over Movie {movie_id_2} based on the number of views."
            elif count_2 > count_1:
                return f"Movie {movie_id_2} is preferred over Movie {movie_id_1} based on the number of views."
            else:
                return f"Movies {movie_id_1} and {movie_id_2} are equivalent based on rating and number of views."


rating_paths = [
    "C:\\Users\\PC\\Desktop\\turktelekombotcamp\\rating_1.txt",
    "C:\\Users\\PC\\Desktop\\turktelekombotcamp\\rating_2.txt",
    "C:\\Users\\PC\\Desktop\\turktelekombotcamp\\rating_3.txt",
    "C:\\Users\\PC\\Desktop\\turktelekombotcamp\\rating_4.txt"
]
movies_path = 'C:\\Users\\PC\\Desktop\\turktelekombotcamp\\movie_titles.csv'

processor = MovieComparison(rating_paths, movies_path)
processor.load_ratings()
processor.load_movies()

result = processor.compare_movies(1, 2)
print(result)

Movie 1 is preferred over Movie 2 based on the average rating.
