# This is a script to run automated crawling

### Import necessary libaries

In [1]:
import json
import subprocess
import matplotlib.pyplot as plt
import pandas as pd

### Set plotting params

In [2]:
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 14

### Define needed constants

In [28]:
JAR_FILE_PATH = '/home/gorq/Desktop/vwm-final/VWM/walker_2.0/out/artifacts/walker_2_0_jar/walker_2.0.jar'
DB_URL = 'localhost:3306'
DB_NAME = 'market'
USERNAME = 'gorq'
PASSWORD = 'hesloheslo'

### Define an experiment class

In [26]:
class Experiment:
    def __init__(self, dct: dict, prod_json: str) -> None:
        self._dct = dct
        self._prod_json = prod_json
    
    @property
    def timeout(self) -> int:
        return self._dct['timeout']
    
    @property
    def request_limit(self) -> int:
        return self._dct['request limit']
    
    @property
    def ordering(self) -> str:
        return self._dct['ordering']
    
    @property
    def average_depth(self) -> float:
        return self._dct['average depth']
    
    @property
    def average_branching(self) -> float:
        return self._dct['average branching']
    
    @property
    def C(self) -> float:
        return self._dct['walker']['C']
    
    @property
    def acquired_amount(self) -> int:
        return self._dct['walker']['acquired amount']
    
    @property
    def unaccepted_amount(self) -> int:
        return self._dct['not accepted']
    
    @property
    def report_frequency(self) -> float:
        return self._dct['reporter']['frequency']
    
    @property
    def progress_list(self) -> list:
        return self._dct['reporter']['acquired times']
    
    @property
    def products_df(self) -> pd.DataFrame:
        return pd.read_json(self._prod_json, orient='records')
    
    def toPSV(self) -> str:
        return f'\
{self.timeout}|\
{self.request_limit}|\
{self.ordering}|\
{self.C}|\
{self.average_depth}|\
{self.average_branching}|\
{self.report_frequency}|\
{self.progress_list}|\
{self.acquired_amount}|\
{self.unaccepted_amount}\
'
    
    def graph(self) -> None:
        plt.rcParams['figure.figsize'] = (10, 7)
        plt.rcParams['font.size'] = 15
        plt.ylabel('Acquired amount')
        plt.xlabel('Time passed (seconds)')
        
        plt.grid(True)
        
        plt.plot(
            [0]+[ self.report_frequency*x for x in range(1, len(self.progress_list)+1)],
            [0]+self.progress_list,
            label='Progress'
        )
        
        
        plt.legend()
        plt.show()
        
        
    def outputInfo(self) -> None:
        print(self)
        self.graph()
    
    def __str__(self) -> str:
        return f'''
--------------------------------------------------------
| Timeout: {self.timeout} seconds
| Request limit: {self.request_limit}
| Ordering: {self.ordering}
| C: {self.C}
| Average depth: {self.average_depth}
| Average branching: {self.average_branching}
| Report frequency: {round(1/self.report_frequency, 3)} updates per second
| Progress list sample: {self.progress_list[:5]}...
| Acquired amount: {self.acquired_amount}
| Not accepted amount: {self.unaccepted_amount}
--------------------------------------------------------
'''

### Create a function to run an experiment

In [19]:
def run_experiment(
        db_url: str,
        db_name: str,
        username: str,
        password: str,
        *_,
        timeout: int = 10,
        request_limit: int = 1,
        C: int = 1,
        frequency: float = 1,
        stat_file: str,
        products_file: str,
        ordering: str = 'fixed',
        wanted: int
    ) -> Experiment:
    to_run = f'''
        java -jar {JAR_FILE_PATH}
        --db-url {db_url}
        --db-name {db_name}
        --username {username}
        --password {password}
        --timeout {timeout}
        --request-limit {request_limit}
        --wanted-sample {wanted}
        --set-c {C}
        --report-frequency {frequency}
        --output-stats {stat_file}
        --get-products {products_file}
        --ordering {ordering}
    '''
    
    process = subprocess.Popen(to_run.split(), stdout=subprocess.PIPE)
    o, e = process.communicate()
    
    with open(stat_file, 'r') as f:
        return Experiment(json.load(f), products_file)

### Run the experiment itself

In [29]:
exp = run_experiment(
    DB_URL,
    DB_NAME,
    USERNAME,
    PASSWORD,
    timeout=5,
    C=2,
    stat_file='test.json',
    products_file='test2.json',
    wanted=1000
)