In [2]:
import pandas as pd

# Load your dataset
df = pd.read_csv('csv/food_rows.csv')

In [3]:
pd.set_option('display.max_rows', None)
df['DAYS'].value_counts()

DAYS
WED                         69
SAT                         69
THUR                        52
TUE                         40
FRI                         30
MON-FRI                     19
SUN                         14
TUE,THUR                    14
MON                         12
SAT (1,3)                    7
SAT (2,4)                    6
MON-THUR                     6
THUR (1,3)                   5
SAT(1,2,3,4)                 4
TUE/THUR                     4
TUE-FRI                      4
MON,WED,FRI                  4
SAT(2,4)                     4
TUE (2,4)                    4
TUE,WED,THUR                 3
WED,THUR                     3
WED (2,4)                    3
MON,THUR                     3
TUE-THUR                     3
TUE,WED                      3
WED,FRI                      3
MON,WED                      3
WED/SAT                      3
MON-SUN                      2
FRI (4TH)                    2
THUR,SAT                     2
WED/THUR                     2
TUE

In [4]:
import re

# Step 1: Extract the week numbers into a new column
df['WEEKS_OPEN'] = df['DAYS'].apply(lambda x: re.findall(r'\((.*?)\)', x))

# Step 2: Clean the DAYS column by removing the parentheses and their contents
df['DAYS'] = df['DAYS'].apply(lambda x: re.sub(r'\(.*?\)', '', x).strip())

# Display the updated DataFrame to see the changes
print(df[['DAYS', 'WEEKS_OPEN']].head())

# Optional: If you want to convert WEEKS_OPEN from a list to a string for easier viewing/filtering
df['WEEKS_OPEN'] = df['WEEKS_OPEN'].apply(lambda x: ','.join(x) if x else None)

# Display the updated DataFrame
print(df[['DAYS', 'WEEKS_OPEN']].head())

       DAYS WEEKS_OPEN
0       SUN      [2,4]
1      THUR         []
2  M,W,THUR         []
3      THUR         []
4       TUE         []
       DAYS WEEKS_OPEN
0       SUN        2,4
1      THUR       None
2  M,W,THUR       None
3      THUR       None
4       TUE       None


In [5]:
df['WEEKS_OPEN'].value_counts() 

WEEKS_OPEN
2,4                29
1,3                26
1,2,3,4             9
2ND                 6
3RD                 5
4TH                 3
3                   3
4                   3
1                   3
2                   2
1ST                 2
2,3,4               2
4,2                 2
1,2,3               2
3,1                 2
CLOSED 5TH WED      1
1st                 1
,4                  1
4TH ONLY            1
1,2,4,5,3           1
1,3,2,4             1
1ST,3RD             1
BY APPT             1
SR ONLY             1
LAST                1
1,2,3,4,1,2,3,4     1
2,3                 1
1,3,5,2,4           1
1,2,3,4,4           1
2,LAST              1
Name: count, dtype: int64

In [6]:
# Define the full day names in the correct order
full_days = ['MON', 'TUE', 'WED', 'THUR', 'FRI', 'SAT', 'SUN']

# Mapping of day abbreviations to full day names
day_mapping = {
    'M': 'MON',
    'MON': 'MON',
    'TU': 'TUE',
    'TUE': 'TUE',
    'W': 'WED',
    'WED': 'WED',
    'TH': 'THUR',
    'THU': 'THUR',
    'THUR': 'THUR',
    'F': 'FRI',
    'FRI': 'FRI',
    'SAT': 'SAT',
    'SUN': 'SUN'
}

# Initialize all days columns to 0
for day in full_days:
    df[day] = 0

# Function to expand day ranges like MON-FRI
def expand_day_range(day_range):
    start_day, end_day = day_range.split('-')
    start_day = day_mapping.get(start_day, start_day)
    end_day = day_mapping.get(end_day, end_day)
    start_idx = full_days.index(start_day)
    end_idx = full_days.index(end_day)
    return full_days[start_idx:end_idx + 1]

# Function to clean, split, and map the days string
def process_days(days_str):
    days_str = days_str.replace(" ", "").replace("/", ",")
    days_list = days_str.split(',')

    expanded_days = []
    for day in days_list:
        if '-' in day:  # Handle ranges like MON-FRI
            expanded_days.extend(expand_day_range(day))
        else:
            # Map abbreviations to full day names
            full_day = day_mapping.get(day, day)
            expanded_days.append(full_day)

    return expanded_days

# Populate the columns based on the processed days
for index, row in df.iterrows():
    days_list = process_days(row['DAYS'])
    for day in days_list:
        if day in df.columns:
            df.at[index, day] = 1

# Display the updated DataFrame to check the transformation
print(df[['DAYS', 'MON', 'TUE', 'WED', 'THUR', 'FRI', 'SAT', 'SUN']].head())

       DAYS  MON  TUE  WED  THUR  FRI  SAT  SUN
0       SUN    0    0    0     0    0    0    1
1      THUR    0    0    0     1    0    0    0
2  M,W,THUR    1    0    1     1    0    0    0
3      THUR    0    0    0     1    0    0    0
4       TUE    0    1    0     0    0    0    0


In [7]:
days = df[['DAYS', 'MON', 'TUE', 'WED', 'THUR', 'FRI', 'SAT', 'SUN']]
days


Unnamed: 0,DAYS,MON,TUE,WED,THUR,FRI,SAT,SUN
0,SUN,0,0,0,0,0,0,1
1,THUR,0,0,0,1,0,0,0
2,"M,W,THUR",1,0,1,1,0,0,0
3,THUR,0,0,0,1,0,0,0
4,TUE,0,1,0,0,0,0,0
5,TUE,0,1,0,0,0,0,0
6,"TUE, THUR,FRI",0,1,0,1,1,0,0
7,THUR,0,0,0,1,0,0,0
8,MON-FRI,1,1,1,1,1,0,0
9,TUE,0,1,0,0,0,0,0


In [32]:
# Rename the columns in the DataFrame to match the database column names
df.rename(columns={
    "MON": "mon",
    "TUE": "tue",
    "WED": "wed",
    "THUR": "thur",
    "FRI": "fri",
    "SAT": "sat",
    "SUN": "sun"
}, inplace=True)

In [40]:
df.to_csv('csv/food_rows.csv', index=False)