# Task 1: 

In [128]:
import requests
import math
import pprint as pp


def get_basic_list_of_movies_for_title(movie_title, number_of_pages = 1): #if pages is not specified, assume 1
    '''get basic info of movies'''
    
    api_key = "dd14dc5f" 
    query = movie_title
    result_type="movie"
    results_per_page = 10
    year = "" 
    
    movies_basic_info = []
    for page in range(1, number_of_pages + 1):
        url = f"http://www.omdbapi.com/?s={query}&page={page}&apikey={api_key}&type={result_type}&y={year}"
        result_dict = requests.request("GET", url).json()
        movies_basic_info.extend( result_dict['Search'] ) 
        totalResults = int(result_dict['totalResults']) 
        number_of_pages = math.ceil(totalResults / results_per_page) # ceil means round UP
        if  number_of_pages <= page:
            break # if reached last page, stop looping

    return movies_basic_info

In [129]:
assert len(get_basic_list_of_movies_for_title("star wars")) == 10
assert len(get_basic_list_of_movies_for_title("star wars", 2)) > 10
assert "Star Wars" in get_basic_list_of_movies_for_title("star wars")[0]['Title'] 
print("tests passed")

tests passed


In [130]:
def get_details_of_movie_with_id(movie_id): # if movie does not exist, return None
    api_key = "dd14dc5f" 
    url = f"http://www.omdbapi.com/?i={movie_id}&apikey={api_key}"
    result_dict = requests.request("GET", url).json()
    return result_dict if result_dict['Response'] == "True" else None 
    # above is a ternary (simplified if) value_if_true if condition else value_if_false

In [131]:
assert get_details_of_movie_with_id("tt0076759")['Response'] == "True"
assert get_details_of_movie_with_id("tt0076759")['Title'] == "Star Wars: Episode IV - A New Hope"
assert get_details_of_movie_with_id("bananas") == None
print("tests passed")

tests passed


In [132]:
def get_detailed_info_of_movies_for_title(movie_title, number_of_pages = 1): #if pages is not specified, assume 1
    basic_movie_infos = get_basic_list_of_movies_for_title(movie_title, number_of_pages)
    detailed_movie_infos = []
    for movie in basic_movie_infos:
        movie_details = get_details_of_movie_with_id(movie['imdbID'])
        if movie_details != None:
             detailed_movie_infos.append(movie_details) 
    return detailed_movie_infos

assert len(get_detailed_info_of_movies_for_title("star wars")) == 10
assert len(get_detailed_info_of_movies_for_title("star wars", 2)) > 10
assert "Star Wars" in get_detailed_info_of_movies_for_title("star wars")[0]['Title'] 
assert get_detailed_info_of_movies_for_title("star wars")[0]['Director'] != None
print("tests passed")

tests passed


In [133]:
def movie_detail_as_string(movie):
    basic_info = f"{movie['Title']} ({movie['Year']}, {movie['Rated']}, {movie['Country']})"
    details = f"Director: {movie['Director']}, Actors: {movie['Actors']})"
    plot = f"Plot: {movie['Plot']}"
    return f"{basic_info:*^90}\n* {details[0:86]:*^86} *\n* {plot[0:86]:*^86} *\n{'':*^90}\n\n"
    # details[0:86] is trimming details to just first 86 characters


In [134]:
expected_output = "********************Star Wars: Episode IV - A New Hope (1977, PG, USA)********************\n* Director: George Lucas, Actors: Mark Hamill, Harrison Ford, Carrie Fisher, Peter Cushi *\n* Plot: Luke Skywalker joins forces with a Jedi Knight, a cocky pilot, a Wookiee and two *\n******************************************************************************************\n\n"
example_movie = {'Title': 'Star Wars: Episode IV - A New Hope',
 'Year': '1977',
 'Rated': 'PG',
 'Director': 'George Lucas',
 'Actors': 'Mark Hamill, Harrison Ford, Carrie Fisher, Peter Cushing',
 'Plot': "Luke Skywalker joins forces with a Jedi Knight, a cocky pilot, a Wookiee and two droids to save the galaxy from the Empire's world-destroying battle station, while also attempting to rescue Princess Leia from the mysterious Darth Vader.",
 'Country': 'USA'}
assert movie_detail_as_string(example_movie) == expected_output
print("tests passed")

tests passed


In [135]:
def translate_rating_into_subgroup(rating_string):
    rating_as_number = float(rating_string)
    if rating_as_number < 7.0:
        return "Mediocre"
    elif rating_as_number < 8.0:
        return "Good"
    else:
        return "Amazing"

assert translate_rating_into_subgroup("3.3") == "Mediocre"
assert translate_rating_into_subgroup("7.0") == "Good"
assert translate_rating_into_subgroup("7.6") == "Good"
assert translate_rating_into_subgroup("8.0") == "Amazing"

In [136]:
def add_rating_group_to_details(all_movies):
    for movie in all_movies:
        movie["Subgroup"] = translate_rating_into_subgroup(movie['imdbRating'])
    return all_movies


test_movies = [{"Title":"Bananas", "imdbRating": "3.3"}, 
              {"Title":"Bananas2", "imdbRating":"7.3"}, 
              {"Title":"Bananas3", "imdbRating":"8.3"}]
test_movies_expected = [{"Title":"Bananas", "imdbRating": "3.3", "Subgroup": "Mediocre"}, 
                        {"Title":"Bananas2", "imdbRating": "7.3", "Subgroup": "Good"}, 
                        {"Title":"Bananas3", "imdbRating": "8.3", "Subgroup": "Amazing"}]

add_rating_group_to_details(test_movies)
assert test_movies == test_movies_expected
print("tests passed")

tests passed


In [137]:
def movies_in_a_sub_group(all_movies, subgroup):
    return [movie
            for movie in all_movies
            if movie['Subgroup'] == subgroup]

test_movies = [{"Title":"Bananas", "Subgroup": "Mediocre"}, 
                        {"Title":"Bananas2", "Subgroup": "Amazing"}, 
                        {"Title":"Bananas3", "Subgroup": "Amazing"}]
assert movies_in_a_sub_group(test_movies, "Mediocre") == [{"Title":"Bananas", "Subgroup": "Mediocre"}]
print("tests passed")

tests passed


In [138]:
def string_length_to_number_of_minutes(string_length):
    return int(string_length.rstrip(' min')) if " min" in string_length else 0

assert string_length_to_number_of_minutes("123 min") == 123
assert string_length_to_number_of_minutes("53 min") == 53
assert string_length_to_number_of_minutes("bananas") == 0
print("tests passed")

tests passed


In [139]:
def average_length_of_a_movies(some_movies):
    lengths = [string_length_to_number_of_minutes(movie['Runtime'])
            for movie in some_movies]
    return int( sum(lengths) / len(lengths) ) if len(lengths) > 0 else 0
    
test_movies = [{"Title":"Bananas", "Runtime": "105 min"}, 
                        {"Title":"Bananas2", "Runtime": "100 min"}, 
                        {"Title":"Bananas3", "Runtime": "95 min"}]

assert average_length_of_a_movies(test_movies) == 100

test_movies_empty = []
assert average_length_of_a_movies(test_movies_empty) == 0
print("tests passed")

tests passed


In [140]:
def prepare_report_for_movies_with_title(movie_title):
    all_movies = get_detailed_info_of_movies_for_title(movie_title)
    add_rating_group_to_details(all_movies)
    report = f"Top 10 movies titled {movie_title}\n\n"
    
    for subcategory in ["Mediocre",  "Good", "Amazing"]:
        movies_in_subcategory = movies_in_a_sub_group(all_movies, subcategory)
        average_length = average_length_of_a_movies(movies_in_subcategory)
        report += f"Movies which are {subcategory} (average length {average_length} min)\n\n"
        for movie in movies_in_subcategory:
            report += movie_detail_as_string(movie)
        report += "\n\n"
    return report

In [141]:
report = prepare_report_for_movies_with_title("batman")
print(report)

Top 10 movies titled batman

Movies which are Mediocre (average length 118 min)

******************Batman v Superman: Dawn of Justice (2016, PG-13, USA)*******************
* Director: Zack Snyder, Actors: Ben Affleck, Henry Cavill, Amy Adams, Jesse Eisenberg)* *
* Plot: Fearing that the actions of Superman are left unchecked, Batman takes on the Man *
******************************************************************************************

**************************Batman Forever (1995, PG-13, USA, UK)***************************
* Director: Joel Schumacher, Actors: Val Kilmer, Tommy Lee Jones, Jim Carrey, Nicole Kid *
* Plot: Batman must battle former district attorney Harvey Dent, who is now Two-Face and *
******************************************************************************************

**************************Batman & Robin (1997, PG-13, USA, UK)***************************
* Director: Joel Schumacher, Actors: Arnold Schwarzenegger, George Clooney, Chris O'Donn *
* Plot:

In [142]:
def are_shorter_movies_better_simple_version(movie_title):
    all_movies = get_detailed_info_of_movies_for_title(movie_title)
    add_rating_group_to_details(all_movies)    
    avg_lengths = {}
    for subcategory in ["Mediocre",  "Good", "Amazing"]:
        avg_lengths[subcategory] = average_length_of_a_movies(movies_in_a_sub_group(all_movies, subcategory))
        
    return avg_lengths["Mediocre"] >= avg_lengths["Good"] and avg_lengths["Good"] >= avg_lengths["Amazing"]

assert are_shorter_movies_better_simple_version("star wars") == False
assert are_shorter_movies_better_simple_version("batman") == True

In [143]:
title = 'star wars'
print(f"It is {are_shorter_movies_better_simple_version(title)} that shorter {title} movies are always better")

It is False that shorter star wars movies are always better


# Report 1

### Problem: What problem you are solving and how you will solve it with data.

We have access to the api with movie information, and would like to show details of movies. Details should appear in a clear and easy to read way. The movies should be grouped by their rating, assuming 3 basic groups (mediocre, good and amazing). We'd also like to show the average lengths of movies in these groups, and state if longer movies have better ratings.

###  Analysis & Results: What analysis you prefromed in your python code and what were the results

We wrote a number of functions and their tests that aquire, and prepare the data. We also wrote functions that prepare the report and present it. Groups are based on imdb rating. We separated tasks of sub-grouping movies and calculating average lengths of a sub group to make out code more robust and testable. We also attempted to write tests that did not depend on API results, but rather mock data

### Solution: What is the solution to the problem, and how your analysis helped you to solve it
Movie details of 10 Star wars movies were printed on the screen. Actors and plots were trimmed for uniformity, but this could be further expanded. it turned out that there is no simpel connection between rating categories and length for stawars movies (but curiously, shorter Batman movies were better!). In some places we tried to check for incorrect data, so that our code can handle wrong runtime formatting and ignores moview with no detailed info.

# Dataset 2 ( Task 2 ): Weather forecast

website: https://www.weatherapi.com/

Use the Weather forecast API described below to look at current and predicted weather. Note that it includes information about the location, weather now, and forecast for the next few days. Look at what data are available about each movie and find a business problem you can solve with it.

Tasks: in Task 2 solve ONE of the suggested problems.

Each task should be have its own mini-report, and its own code, but it is allowed to share some code between tasks, if needed.

Below you have some examples of using the api

I am giving you a temporary api key "2aee514fa2e3493e8b5100342202610" - you can use it a bit, but please go to the above website and create your own. If for some reason you can't get yoru own, just keep using this temporary one.            

# Task 2: Pick one of the suggested problems and solve it:


- Problem 1: A convention of Scottish Star Wars movie fans is preparing for a bit outdoors cosplay meeting and are considering three locations: Glasgow, Edinburgh and Falkirk. They would like to know what is the weather like right now in these locations. Print on the screen the weather information about these locations right now in a way that makes it easy to compare and make a decision where the costume party should happen. Can your code suggest the location (eg. one which is warmest). Extra: If the event cannot happen now, which location has the best weather tomorrow or on another day?

- Problem 2: Actors' associations is preparing for their annual haloween photo shoot in Edinburgh Castle, and it has to happen within next 10 days. It will have to happen at night and outside, so they need you to identify the day when it's least likely to rain in Edinburgh and the moon is brightest. Look at the forecast data and extract 'daily_chance_of_rain' and 'moon_illumination' values. Then present the information about next 10 days. Suggest the day when it's least likely to rain and one when the moon is brightest. Extra: Because of security reasons the castle needs to close at midnight - assuming the photoshoot needs to happen at least an hour after sunset, and once the moon has risen, how much time would they have each night to perform the photoshoot? (you can look at 'moonrise' and 'sunset' values).


In [146]:
import requests
import pprint as pp

def get_weather_for_city(city):
    api_key = "2aee514fa2e3493e8b5100342202610" 
    days = 3
    api_url_current_weather = f"http://api.weatherapi.com/v1/forecast.json?key={api_key}&q={city}&days={days}"
    
    response = requests.request("GET", api_url_current_weather)
    return response.json()

assert get_weather_for_city("Edinburgh")['location']['name'] == "Edinburgh"
assert  len(get_weather_for_city("Edinburgh")['forecast']['forecastday']) == 3

In [None]:
print(f"it's {weather_edinburgh['current']['temp_c']}'C in {weather_edinburgh['location']['name']}")
print(f"tomorrow it will be {weather_edinburgh['forecast']['forecastday'][0]['day']['avgtemp_c']}'C")

In [189]:
def weather_in_this_many_days(all_forecast, days_from_now = 0):
    return all_forecast['forecast']['forecastday'][days_from_now]

In [190]:
import datetime
today = date.today()
tomorrow = date.today() + datetime.timedelta(days=1)
all_forecast = get_weather_for_city("Edinburgh")

assert weather_in_this_many_days(all_forecast, 0)['date'] == today.strftime("%Y-%m-%d")
assert weather_in_this_many_days(all_forecast, 1)['date'] == tomorrow.strftime("%Y-%m-%d")
print("tests passed")

tests passed


In [206]:
def day_weather_as_string(city_name, day_weather, warmest_temperature = None, lowest_rain_chance = None):
    temperature = day_weather['day']['avgtemp_c']
    chance_of_rain = day_weather['day']['daily_chance_of_rain']
    wind = day_weather['day']['maxwind_kph']
    
    is_warmest = warmest_temperature!= None and temperature == warmest_temperature
    is_lowest_rain = lowest_rain_chance!= None and chance_of_rain == lowest_rain_chance
    is_best = is_warmest and is_lowest_rain
    
    #     notice that emojis are just like any other characters
    warmness_icon = "🔥" if is_warmest else ""
    raininess_icon = "🌤" if is_lowest_rain  else ""
    is_best_icon = "❤️" if is_best else ""
    
    day_report = f"{city_name:<10} {temperature:>10}'C Temperature{chance_of_rain:>10}% chance of rain{wind:>10} Wind speed"
    day_report += f" {warmness_icon:1}{raininess_icon:1}{is_best_icon:1}\n"
    return day_report

In [211]:
expected_simple = "Edinburgh         6.8'C Temperature        88% chance of rain      18.4 Wind speed    \n"
expected_warm =   "Edinburgh         6.8'C Temperature        88% chance of rain      18.4 Wind speed 🔥  \n"
expected_best =   "Edinburgh         6.8'C Temperature        88% chance of rain      18.4 Wind speed 🔥🌤❤️\n"

example_weather = {'day': { 'avgtemp_c': 6.8, 'maxwind_kph': 18.4, 'daily_chance_of_rain': 88} }

assert day_weather_as_string("Edinburgh", example_weather) == expected_simple
assert day_weather_as_string("Edinburgh", example_weather, 6.8, 98 ) == expected_warm
assert day_weather_as_string("Edinburgh", example_weather, 6.8, 88 ) == expected_best

print("tests passed")

tests passed


"Edinburgh         6.8'C Temperature        88% chance of rain      18.4 Wind speed 🔥🌤❤️\n"

In [215]:
def max_temperature(forecasts, days_from_now):
    return max( [ weather_in_this_many_days(city_weather, days_from_now)['day']['avgtemp_c']
                        for city_weather in forecasts
                      ])

def lowest_rain_chance(forecasts, days_from_now):
    return min( [ weather_in_this_many_days(city_weather, days_from_now)['day']['daily_chance_of_rain']
                        for city_weather in forecasts
                      ])

In [216]:
def weather_report_for_cities(cities):
    forecasts = {}
    report = ""
    
#     gather data from api
    for city in cities:
        forecasts[city] = get_weather_for_city(city)
    
    days = {"Today": 0, "Tomorrow":1, "Day after tomorrow": 2}
    for day_name, days_from_now in days.items():
        report += f"Weather for {day_name}\n"
        
        #     calculate extremes
        temp_max = max_temperature( forecasts.values(), days_from_now )
        rain_lowest = lowest_rain_chance( forecasts.values(), days_from_now )
        
        #     produce report
        for city, forecast in forecasts.items():
            just_today = weather_in_this_many_days(forecast, days_from_now)
            day_info = f"{day_weather_as_string(city, just_today, temp_max, rain_lowest)}"
            report += day_info

        report += f"\n\n"
    return report
    
        
print(weather_report_for_cities(["Edinburgh", "Glasgow", "Falkirk"]))

Weather for Today
Edinburgh         8.1'C Temperature        91% chance of rain      20.2 Wind speed 🔥🌤❤️
Glasgow           7.0'C Temperature        99% chance of rain      17.3 Wind speed    
Falkirk           7.1'C Temperature        94% chance of rain      18.0 Wind speed    


Weather for Tomorrow
Edinburgh         8.4'C Temperature        84% chance of rain      21.2 Wind speed 🔥🌤❤️
Glasgow           7.6'C Temperature        94% chance of rain      28.4 Wind speed    
Falkirk           7.5'C Temperature        91% chance of rain      26.3 Wind speed    


Weather for Day after tomorrow
Edinburgh         7.9'C Temperature        73% chance of rain      37.8 Wind speed 🔥🌤❤️
Glasgow           6.8'C Temperature        86% chance of rain      29.2 Wind speed    
Falkirk           6.9'C Temperature        86% chance of rain      28.8 Wind speed    





# Report 2

### Problem: What problem you are solving and how you will solve it with data.

We have access to the api with weather information for cities which produces forecast for next 3 days. We will print information for 3 Scottish cities. Such report will be created for each of the next 3 days. Report will indicate which city is warmest and which least rainy, for each of days. We will indicate preferences with emoji symbols to enable user to make decisions.

###  Analysis & Results: What analysis you prefromed in your python code and what were the results

We wrote a number of functions and their tests that aquire, and prepare the data. We also wrote functions that prepare the report and present it. We used a separate function to aquire API data and to filter it to a particular day, which enabled us to just ask API a small number of times at the beginning, and then just use data. Additionally, only once all data is aquired we can find out which city is warmest, that's why separating getting data and reporting on them was a good call.

### Solution: 
On the day of writing Edinburgh was the warmest and least rainy city of Edinburgh, Glasgow and Falkirk. Emojis make it easier to see which city is most suitable, and additionally, if one city is both warmest and most dry, it is indicated with a heart. The code can handle a situation where two cities are equally warm (both will get an icon).