API and script test with just the first listing data

In [8]:
import pandas as pd
import requests
import json
from datetime import datetime
import numpy as np
import os
from dotenv import load_dotenv

def get_market_value(row):

    load_dotenv()
    API_KEY = os.getenv('MARKETCHECK_API_KEY')
    if not API_KEY:
        raise ValueError("MARKETCHECK_API_KEY not found in environment variables")

    base_url = "https://marketcheck-prod.apigee.net/v2/search/car/active"  # Changed to HTTPS
    
    # Clean and convert mileage first
    clean_mileage = float(str(row['mileage']).replace(' Miles', '').replace(',', ''))
    mileage = int(clean_mileage)
    
    print(f"\nMaking API request for:")
    print(f"Year: {row['year']}")
    print(f"Make: {row['make']}")
    print(f"Model: {row['model']}")
    print(f"Trim: {row['trim']}")
    print(f"Mileage: {mileage}")
    
    # Parameters for the search according to API docs
    params = {
        'api_key': API_KEY,
        'year': row['year'],
        'make': row['make'],
        'model': row['model'],
        'trim': row['trim'],
        'miles': f'{max(0, mileage-10000)}-{mileage+10000}',  # Changed from miles_range to miles
        'lat': 40.7608,  # Changed from latitude
        'long': -111.8910,  # Changed from longitude
        'radius': 500,
        'stats': 'price,miles',
        'dealer_type': 'all'  # Changed to lowercase
    }
    
    headers = {
        'Accept': 'application/json',
        'Api-Version': '2.0.0',
        'Authorization': f'Bearer {os.getenv("MARKETCHECK_API_SECRET")}'  # Add API secret
    }
    
    try:
        print("\nSending API request...")
        response = requests.get(
            base_url, 
            params=params, 
            headers=headers, 
            timeout=30
        )
        print(f"Response status code: {response.status_code}")
        print(f"Request URL: {response.url}")  # Added for debugging
                
        data = response.json()
        
        if response.status_code == 200:
            stats = data.get('stats', {})
            price_stats = stats.get('price', {})
            
            result = {
                'mean_price': price_stats.get('mean', None),
                'median_price': price_stats.get('median', None),
                'min_price': price_stats.get('min', None),
                'max_price': price_stats.get('max', None),
                'std_dev': price_stats.get('std', None),
                'sample_size': stats.get('count', 0)
            }
            
            print("\nReceived market values:")
            for key, value in result.items():
                print(f"{key}: {value}")
                
            return result
        else:
            print(f"\nError response: {data}")
            return None
            
    except Exception as e:
        print(f"\nException occurred: {e}")
        return None

# Load the data
print("Loading CSV file...")
df = pd.read_csv('ksl_cars_detailed.csv')
print(f"Total rows in dataset: {len(df)}")

# Clean price and mileage columns
def clean_price(price):
    if isinstance(price, str):
        return float(price.replace('$', '').replace(',', ''))
    return price

def clean_mileage(mileage):
    if isinstance(mileage, str):
        return float(mileage.replace(' Miles', '').replace(',', ''))
    return mileage

# Process only the first row
first_row = df.iloc[0].copy()
print("\nProcessing first row:")
print(first_row[['year', 'make', 'model', 'trim', 'mileage', 'price']])

# Clean the price and mileage for the first row
first_row['clean_price'] = clean_price(first_row['price'])
first_row['clean_mileage'] = clean_mileage(first_row['mileage'])

# Get market value for the first row
print("\nGetting market value...")
market_value = get_market_value(first_row)

if market_value:
    # Calculate price differences
    price_diff = first_row['clean_price'] - market_value['median_price']
    price_diff_pct = (price_diff / market_value['median_price']) * 100
    
    print("\nAnalysis Results:")
    print("-" * 50)
    print(f"Listing Price: ${first_row['clean_price']:,.2f}")
    print(f"Market Median: ${market_value['median_price']:,.2f}")
    print(f"Price Difference: ${price_diff:,.2f}")
    print(f"Price Difference %: {price_diff_pct:.1f}%")
    
    # Determine deal rating
    if price_diff_pct <= -20:
        deal_rating = "Excellent Deal"
    elif price_diff_pct <= -10:
        deal_rating = "Good Deal"
    elif price_diff_pct <= 0:
        deal_rating = "Fair Price"
    elif price_diff_pct <= 10:
        deal_rating = "Slightly High"
    elif price_diff_pct <= 20:
        deal_rating = "High"
    else:
        deal_rating = "Very High"
    
    print(f"Deal Rating: {deal_rating}")
    print(f"Sample Size: {market_value['sample_size']} comparable vehicles")
else:
    print("\nNo market value data available for this vehicle")

Loading CSV file...
Total rows in dataset: 24

Processing first row:
year                2017
make               Buick
model            Cascada
trim       Sport Touring
mileage     99,749 Miles
price            $12,997
Name: 0, dtype: object

Getting market value...

Making API request for:
Year: 2017
Make: Buick
Model: Cascada
Trim: Sport Touring
Mileage: 99749

Sending API request...
Response status code: 500
Request URL: https://marketcheck-prod.apigee.net/v2/search/car/active?api_key=XYv0Q5wIXVgQDCHmMiivyBb1h1zekzSV&year=2017&make=Buick&model=Cascada&trim=Sport+Touring&miles=89749-109749&lat=40.7608&long=-111.891&radius=500&stats=price%2Cmiles&dealer_type=all

Error response: {'fault': {'faultstring': 'Invalid ApiKey', 'detail': {'errorcode': 'oauth.v2.InvalidApiKey'}}}

No market value data available for this vehicle


NEW VERSION - BASED ON DOCS

In [11]:
import pandas as pd
import requests
import json
from datetime import datetime
import numpy as np
import os
from dotenv import load_dotenv

def get_market_value(row):
    load_dotenv()
    API_KEY = os.getenv('MARKETCHECK_API_KEY')
    if not API_KEY:
        raise ValueError("MARKETCHECK_API_KEY not found in environment variables")

    base_url = "https://mc-api.marketcheck.com/v2/predict/car/price"
    
    # Clean and convert mileage first
    clean_mileage = float(str(row['mileage']).replace(' Miles', '').replace(',', ''))
    mileage = int(clean_mileage)
    
    print(f"\nMaking API request for:")
    print(f"Year: {row['year']}")
    print(f"Make: {row['make']}")
    print(f"Model: {row['model']}")
    print(f"Trim: {row['trim']}")
    print(f"Mileage: {mileage}")
    
    # Parameters for the price prediction API
    params = {
        'api_key': API_KEY,
        'car_type': 'used',
        'year': row['year'],
        'make': row['make'].lower(),  # API expects lowercase
        'model': row['model'].lower(),
        'trim': row['trim'],
        'miles': mileage,
        'latitude': 40.7608,  # Salt Lake City
        'longitude': -111.8910
    }
    
    try:
        print("\nSending API request...")
        response = requests.get(
            base_url, 
            params=params,
            timeout=30
        )
        print(f"Response status code: {response.status_code}")
        print(f"Request URL: {response.url}")
                
        data = response.json()
        
        if response.status_code == 200:
            price_range = data.get('price_range', {})
            predicted_price = data.get('predicted_price')
            
            result = {
                'predicted_price': predicted_price,
                'lower_bound': price_range.get('lower_bound'),
                'upper_bound': price_range.get('upper_bound'),
                'specs': data.get('specs', {})
            }
            
            print("\nReceived market values:")
            print(f"Predicted Price: ${result['predicted_price']:,.2f}")
            print(f"Price Range: ${result['lower_bound']:,.2f} - ${result['upper_bound']:,.2f}")
                
            return result
        else:
            if 'fault' in data:
                fault = data['fault']
                print(f"\nAPI Error: {fault.get('faultstring', 'Unknown error')}")
                if 'detail' in fault:
                    print(f"Error code: {fault['detail'].get('errorcode', 'Unknown')}")
            else:
                print(f"\nError response: {data}")
            return None
            
    except Exception as e:
        print(f"\nException occurred: {e}")
        return None

# Load the data
print("Loading CSV file...")
df = pd.read_csv('ksl_cars_detailed.csv')
print(f"Total rows in dataset: {len(df)}")

# Clean price and mileage columns
def clean_price(price):
    if isinstance(price, str):
        return float(price.replace('$', '').replace(',', ''))
    return price

def clean_mileage(mileage):
    if isinstance(mileage, str):
        return float(mileage.replace(' Miles', '').replace(',', ''))
    return mileage

# Process only the first row
first_row = df.iloc[0].copy()
print("\nProcessing first row:")
print(first_row[['year', 'make', 'model', 'trim', 'mileage', 'price']])

# Clean the price and mileage for the first row
first_row['clean_price'] = clean_price(first_row['price'])
first_row['clean_mileage'] = clean_mileage(first_row['mileage'])

# Get market value for the first row
print("\nGetting market value...")
market_value = get_market_value(first_row)

if market_value:
    # Calculate price differences
    listing_price = first_row['clean_price']
    predicted_price = market_value['predicted_price']
    price_diff = listing_price - predicted_price
    price_diff_pct = (price_diff / predicted_price) * 100
    
    print("\nAnalysis Results:")
    print("-" * 50)
    print(f"Listing Price: ${listing_price:,.2f}")
    print(f"Predicted Price: ${predicted_price:,.2f}")
    print(f"Price Range: ${market_value['lower_bound']:,.2f} - ${market_value['upper_bound']:,.2f}")
    print(f"Price Difference: ${price_diff:,.2f}")
    print(f"Price Difference %: {price_diff_pct:.1f}%")
    
    # Determine deal rating
    if price_diff_pct <= -20:
        deal_rating = "Excellent Deal"
    elif price_diff_pct <= -10:
        deal_rating = "Good Deal"
    elif price_diff_pct <= 0:
        deal_rating = "Fair Price"
    elif price_diff_pct <= 10:
        deal_rating = "Slightly High"
    elif price_diff_pct <= 20:
        deal_rating = "High"
    else:
        deal_rating = "Very High"
    
    print(f"Deal Rating: {deal_rating}")
    
    # Check if price is within predicted range
    if listing_price < market_value['lower_bound']:
        print("Note: Price is below predicted range - possible great deal or potential issues")
    elif listing_price > market_value['upper_bound']:
        print("Note: Price is above predicted range - may be overpriced or have special features")
    else:
        print("Note: Price is within predicted range")
        
else:
    print("\nNo market value data available for this vehicle")

# Save the results
results = {
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'vehicle': f"{first_row['year']} {first_row['make']} {first_row['model']} {first_row['trim']}",
    'listing_price': first_row['clean_price'],
    'mileage': first_row['clean_mileage'],
    'predicted_price': market_value['predicted_price'] if market_value else None,
    'price_range_low': market_value['lower_bound'] if market_value else None,
    'price_range_high': market_value['upper_bound'] if market_value else None,
    'deal_rating': deal_rating if market_value else None,
    'specs': market_value['specs'] if market_value else None
}

# Save to JSON for reference
with open('latest_analysis.json', 'w') as f:
    json.dump(results, f, indent=2)

Loading CSV file...
Total rows in dataset: 24

Processing first row:
year                2017
make               Buick
model            Cascada
trim       Sport Touring
mileage     99,749 Miles
price            $12,997
Name: 0, dtype: object

Getting market value...

Making API request for:
Year: 2017
Make: Buick
Model: Cascada
Trim: Sport Touring
Mileage: 99749

Sending API request...
Response status code: 200
Request URL: https://mc-api.marketcheck.com/v2/predict/car/price?api_key=XYv0Q5wIXVgQDCHmMiivyBb1h1zekzSV&car_type=used&year=2017&make=buick&model=cascada&trim=Sport+Touring&miles=99749&latitude=40.7608&longitude=-111.891

Received market values:
Predicted Price: $14,696.00
Price Range: $14,505.00 - $14,886.00

Analysis Results:
--------------------------------------------------
Listing Price: $12,997.00
Predicted Price: $14,696.00
Price Range: $14,505.00 - $14,886.00
Price Difference: $-1,699.00
Price Difference %: -11.6%
Deal Rating: Good Deal
Note: Price is below predicted ra

This is the test for the entire .csv data set - not revised

In [12]:
import pandas as pd
import requests
import json
from datetime import datetime
import numpy as np
import os
from dotenv import load_dotenv
from tqdm import tqdm  # For progress bar

def get_market_value(row):
    load_dotenv()
    API_KEY = os.getenv('MARKETCHECK_API_KEY')
    if not API_KEY:
        raise ValueError("MARKETCHECK_API_KEY not found in environment variables")

    base_url = "https://mc-api.marketcheck.com/v2/predict/car/price"
    
    # Clean and convert mileage first
    clean_mileage = float(str(row['mileage']).replace(' Miles', '').replace(',', ''))
    mileage = int(clean_mileage)
    
    # Parameters for the price prediction API
    params = {
        'api_key': API_KEY,
        'car_type': 'used',
        'year': row['year'],
        'make': row['make'].lower(),  # API expects lowercase
        'model': row['model'].lower(),
        'trim': row['trim'],
        'miles': mileage,
        'latitude': 40.7608,  # Salt Lake City
        'longitude': -111.8910
    }
    
    try:
        response = requests.get(
            base_url, 
            params=params,
            timeout=30
        )
                
        data = response.json()
        
        if response.status_code == 200:
            price_range = data.get('price_range', {})
            predicted_price = data.get('predicted_price')
            
            return {
                'predicted_price': predicted_price,
                'lower_bound': price_range.get('lower_bound'),
                'upper_bound': price_range.get('upper_bound'),
                'specs': data.get('specs', {})
            }
        else:
            return None
            
    except Exception as e:
        print(f"\nException occurred for {row['year']} {row['make']} {row['model']}: {e}")
        return None

def analyze_listing(row, market_value):
    if not market_value:
        return {
            'listing_price': row['clean_price'],
            'predicted_price': None,
            'price_range_low': None,
            'price_range_high': None,
            'price_difference': None,
            'price_difference_pct': None,
            'deal_rating': 'No Data',
            'price_position': 'Unknown'
        }
    
    listing_price = row['clean_price']
    predicted_price = market_value['predicted_price']
    price_diff = listing_price - predicted_price
    price_diff_pct = (price_diff / predicted_price) * 100
    
    # Determine deal rating
    if price_diff_pct <= -20:
        deal_rating = "Excellent Deal"
    elif price_diff_pct <= -10:
        deal_rating = "Good Deal"
    elif price_diff_pct <= 0:
        deal_rating = "Fair Price"
    elif price_diff_pct <= 10:
        deal_rating = "Slightly High"
    elif price_diff_pct <= 20:
        deal_rating = "High"
    else:
        deal_rating = "Very High"
    
    # Determine price position relative to predicted range
    if listing_price < market_value['lower_bound']:
        price_position = "Below Range"
    elif listing_price > market_value['upper_bound']:
        price_position = "Above Range"
    else:
        price_position = "Within Range"
    
    return {
        'listing_price': listing_price,
        'predicted_price': predicted_price,
        'price_range_low': market_value['lower_bound'],
        'price_range_high': market_value['upper_bound'],
        'price_difference': price_diff,
        'price_difference_pct': price_diff_pct,
        'deal_rating': deal_rating,
        'price_position': price_position
    }

# Load and clean the data
print("Loading CSV file...")
df = pd.read_csv('ksl_cars_detailed.csv')
print(f"Total rows in dataset: {len(df)}")

# Clean price and mileage columns
def clean_price(price):
    if isinstance(price, str):
        return float(price.replace('$', '').replace(',', ''))
    return price

def clean_mileage(mileage):
    if isinstance(mileage, str):
        return float(mileage.replace(' Miles', '').replace(',', ''))
    return mileage

# Clean the data
df['clean_price'] = df['price'].apply(clean_price)
df['clean_mileage'] = df['mileage'].apply(clean_mileage)

# Process all rows
results = []
print("\nProcessing all listings...")
for idx, row in tqdm(df.iterrows(), total=len(df)):
    # Get market value
    market_value = get_market_value(row)
    
    # Analyze the listing
    analysis = analyze_listing(row, market_value)
    
    # Combine basic info with analysis
    result = {
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'vehicle': f"{row['year']} {row['make']} {row['model']} {row['trim']}",
        'mileage': row['clean_mileage'],
        **analysis
    }
    
    results.append(result)

# Convert results to DataFrame for analysis
results_df = pd.DataFrame(results)

# Save detailed results to JSON
print("\nSaving detailed results to JSON...")
with open('full_analysis.json', 'w') as f:
    json.dump(results, f, indent=2)

# Save summary to CSV
print("Saving summary to CSV...")
results_df.to_csv('analysis_summary.csv', index=False)

# Print summary statistics
print("\nAnalysis Summary:")
print("-" * 50)
print(f"Total listings analyzed: {len(results_df)}")
print("\nDeal Rating Distribution:")
print(results_df['deal_rating'].value_counts())
print("\nPrice Position Distribution:")
print(results_df['price_position'].value_counts())

print("\nTop 5 Best Deals:")
best_deals = results_df[results_df['deal_rating'] == 'Excellent Deal'].sort_values('price_difference_pct')
if not best_deals.empty:
    for _, deal in best_deals.head().iterrows():
        print(f"\n{deal['vehicle']}")
        print(f"Listed: ${deal['listing_price']:,.2f}")
        print(f"Predicted: ${deal['predicted_price']:,.2f}")
        print(f"Difference: {deal['price_difference_pct']:.1f}%")

print("\nAnalysis complete! Check 'full_analysis.json' and 'analysis_summary.csv' for detailed results.")

Loading CSV file...
Total rows in dataset: 24

Processing all listings...


100%|██████████| 24/24 [00:09<00:00,  2.47it/s]


Saving detailed results to JSON...
Saving summary to CSV...

Analysis Summary:
--------------------------------------------------
Total listings analyzed: 24

Deal Rating Distribution:
deal_rating
Excellent Deal    8
Very High         5
Fair Price        4
Good Deal         4
Slightly High     2
No Data           1
Name: count, dtype: int64

Price Position Distribution:
price_position
Below Range     15
Above Range      5
Within Range     3
Unknown          1
Name: count, dtype: int64

Top 5 Best Deals:

2007 BMW Z4 3.0si
Listed: $8,642.00
Predicted: $34,511.00
Difference: -75.0%

2022 Ford Mustang EcoBoost Premium
Listed: $21,228.00
Predicted: $33,634.00
Difference: -36.9%

2022 Ford Mustang EcoBoost Premium
Listed: $21,540.00
Predicted: $32,771.00
Difference: -34.3%

2018 Audi A5 2.0T quattro Premium Plus
Listed: $27,900.00
Predicted: $41,034.00
Difference: -32.0%

2024 Ford Bronco Outer Banks
Listed: $51,020.00
Predicted: $73,065.00
Difference: -30.2%

Analysis complete! Check 'ful


