In [0]:
-- This SQL code demonstrates how to test the retrieval of player information
-- using the last name and player ID variables in Databricks.

In [0]:
DECLARE OR REPLACE VARIABLE last_name_to_lookup STRING;
DECLARE OR REPLACE VARIABLE player_id_to_lookup STRING;

In [0]:
SET VARIABLE last_name_to_lookup = secret('clubroyale','user1_last_name');
SET VARIABLE player_id_to_lookup = secret('clubroyale','user1_rewardnumber');

In [0]:
select last_name_to_lookup,player_id_to_lookup;

# Crossing the Language Barrier
User the _sqldf to extract from the SQL environment the values and make them available in Python environment

In [0]:
%python
display(_sqldf)
last_name_to_lookup = _sqldf.first()["last_name_to_lookup"]
player_id_to_lookup = _sqldf.first()["player_id_to_lookup"]

In [0]:
DECLARE OR REPLACE VARIABLE direct_results STRING ;
DECLARE OR REPLACE VARIABLE  function_results  STRING ;

In [0]:
DECLARE OR REPLACE VARIABLE temp_results STRUCT<status_code INT, text STRING>;
DECLARE OR REPLACE VARIABLE http_request_results STRING ;
DECLARE OR REPLACE VARIABLE header map<string,string>;
DECLARE OR REPLACE VARIABLE payload = to_json(
  named_struct(
    'tbxLNameLookup',
    last_name_to_lookup,
    'tbxPlayerLookup',
    player_id_to_lookup
  )
);
SET VAR payload = map("tbxLNameLookup",last_name_to_lookup,"tbxPlayerLookup",player_id_to_lookup);
SET VAR header = map();
SET VAR temp_results = (
    http_request(
      conn => 'club_royale',
      method => 'POST',
      path => '/PlayerLookup.asp',
      json => payload,
      headers =>header
    )
  );

SET VAR http_request_results = CASE
  WHEN temp_results.status_code = 200 THEN temp_results.text
  ELSE temp_results.text
END;
select http_request_results;

In [0]:
%python
http_request_results = _sqldf.first()["http_request_results"]
displayHTML(http_request_results)

In [0]:
SET VAR function_results = clubroyale.offers.GetClubRoyalePlayerInfoDirect(player_id_to_lookup,last_name_to_lookup)

In [0]:
select function_results;

# Compare using Levenshtein 
https://docs.databricks.com/aws/en/sql/language-manual/functions/levenshtein

The Levenshtein distance is a metric for measuring the difference between two strings by counting the minimum number of single-character edits (insertions, deletions, or substitutions) required to transform one string into the other. Databricks provides built-in support for calculating this distance in both SQL and PySpark.


In [0]:
select LEVENSHTEIN(function_results,http_request_results) as difference,function_results,http_request_results

# Compare Strings
This cell shows how to pass values across language barrier by using dataframes, namely the built in _sqldf Dataframe. 

In Databricks, _sqldf is a temporary view created when you use SQL queries within a Python notebook cell. It allows you to seamlessly switch between SQL and Python by storing the result of the SQL query as a Spark DataFrame, which can then be accessed and manipulated in Python.

The code compares strings by turning them into lists (which are created by spliting the large strings to lists of strings) and then uses the Differ compare function. 


In [0]:
%python
# Display the results of the previous SQL query as a DataFrame
display(_sqldf)
from difflib import Differ
# Extract the first row from the DataFrame
row = _sqldf.first()

# Retrieve the 'function_results' and 'direct_results' columns as strings
string1 = row['function_results']
string2 = row['http_request_results']
# Retrieve the 'difference' column, which contains the Levenshtein distance
levenshtein = row['difference']
# Initialize a Differ object to compare the two strings
differ = Differ()

# Compute the line-by-line difference between the two strings
diff = list(differ.compare(string1.split(), string2.split()))

# Print the differences between the two strings
print("\n".join(diff))
if levenshtein < 80:
  print("Probably the same page, with auto-generated content changing (timestamp).")

# Note: this is not a great solutions, but it is a start


# HTML Differences

In [0]:
%python
# Import HtmlDiff from difflib to generate an HTML side-by-side diff
from difflib import HtmlDiff # https://docs.python.org/3/library/difflib.html#difflib.HtmlDiff

# Assign the two HTML/text strings to compare
html1 = string1
html2 = string2
context = True # If True, shows only the lines around the differences. Defaults to False (shows all lines).
numlines = 2 # (Optional) Number of context lines to show if context=True. Defaults to 5.

# Generate an HTML file showing the differences between the two strings, line by line
html_diff = HtmlDiff().make_file(
    html1.splitlines(),  # Split the first string into lines
    html2.splitlines(),  # Split the second string into lines
    fromdesc="function_results",  # Description for the first version
    todesc="direct_results",     # Description for the second version
    context = context, # If True, shows only the lines around the differences. Defaults to False (shows all lines).
    numlines = numlines # (Optional) Number of context lines to show if context=True. Defaults to 5.
)

# Render the HTML diff output in the Databricks notebook
displayHTML(html_diff)

# Test with Requests


In [0]:
%python
import requests
url = f"https://www.clubroyaleoffers.com/PlayerLookup.asp"

payload = {
    "tbxLNameLookup": last_name_to_lookup,
    "tbxPlayerLookup": player_id_to_lookup
}
response = requests.post(url, data=payload)
print(response)
print(response.text)

In [0]:
%python
from pyspark.sql import Row

df = spark.createDataFrame([Row(response_text=response.text)])
df.createOrReplaceTempView("requests_results") #do this so we can easily compute differences
display(df)

In [0]:
Select * from requests_results;
SET VARIABLE http_request_results = ( select response_text from requests_results limit 1);
select LEVENSHTEIN(function_results,http_request_results) as difference,function_results,http_request_results;

In [0]:
%python
displayHTML(response.text)

In [0]:
%python
string2 = response.text

In [0]:
%python
displayHTML(string2)

In [0]:
%python
# Import HtmlDiff from difflib to generate an HTML side-by-side diff
from difflib import HtmlDiff # https://docs.python.org/3/library/difflib.html#difflib.HtmlDiff

# Assign the two HTML/text strings to compare
html1 = string1
html2 = string2
context = True # If True, shows only the lines around the differences. Defaults to False (shows all lines).
numlines = 2 # (Optional) Number of context lines to show if context=True. Defaults to 5.

# Generate an HTML file showing the differences between the two strings, line by line
html_diff = HtmlDiff().make_file(
    html1.splitlines(),  # Split the first string into lines
    html2.splitlines(),  # Split the second string into lines
    fromdesc="function_results",  # Description for the first version
    todesc="direct_results",     # Description for the second version
    context = context, # If True, shows only the lines around the differences. Defaults to False (shows all lines).
    numlines = numlines # (Optional) Number of context lines to show if context=True. Defaults to 5.
)

# Render the HTML diff output in the Databricks notebook
displayHTML(html_diff)

In [0]:
%python 
displayHTML(string1)