Libraries.

In [1]:
import csv
import re

Acquire variables.

In [2]:
def acquire_variables(Ex_Num):
    # Get file and open blank binding value.
    name = (f"./Data/Exercise{Ex_Num}Data")
    bind = ""
    with open(f"{name}.csv", mode = 'r') as file:
        # Opens file for reading. Converts the csv into a list, then finds the number of columns.
        item = list(csv.reader(file))
        columns = len(item[0])
        row = 1
        col = 0
        while col < columns:
        # Iterates through the first row of values. If it's character-based, append a $. If not, don't.
        # In both cases, go up one row. Grab this value for a variable name.
        # Proceed one column forward. 
        # If that column is the final column, it will append to bind without an extra space.
        # If not, it will append to bind with an extra space.
            if item[row][col].isalpha():
                row -= 1
                temp = item[row][col]
                col +=1
                bind += (f" {temp}$")
                row += 1
            else:
                row -= 1
                temp = item[row][col]
                col +=1
                bind += (f" {temp}")
                row += 1
        # Standardizes all components to be lowercase.
        bind = bind.lower()
        return(bind)

Call the data.

In [3]:
def call_data(Ex_Name,Ex_Num,variables,transformed):
    initial_data = (f"""/* Call data. */
data {Ex_Name};
 infile '../../Data/Exercise{Ex_Num}Data.csv' dlm=',' firstobs = 2;
 input{variables};{transformed}
run; """)
    return(initial_data)

Modify y variables.

In [4]:
def modify_y(y_val):
    new = input("Set name: ")
    equation = input("Set new value: ")
    equation = (f"\n{new} = {y_val}{equation};")
    return new,equation

Modify x variables.

In [5]:
def modify_x(predictors):
    inputting = 0
    equation = ""
    while inputting == 0:
        # Asks for user input to discard. n breaks cycle
        print(f"Current selection:{predictors}")
        purge = input("Write original predictor: ")
        if purge == "n":
            inputting = 1
            break
        else:
            new = input(f"Set name for {purge}: ")
            predictors = re.sub(rf'\b{purge}\b',f"{new}",predictors)
            alter = input("Set new value: ")
            equation += (f"\n{new} = {purge}{alter};")
    return predictors,equation

Test for normality.

In [6]:
def test_normality(Ex_Num,y_val):
    histogram = (f"""/* Test normality. */
ods listing gpath='../../temp outputs';
ods graphics / imagename = "histogram" imagefmt = png;
proc univariate;
 var {y_val};
 histogram/normal;
 ODS select Histogram;
 ODS select GoodnessOfFit;
run;""")
    return(histogram)

Fitted model.

In [7]:
def fitted_model(Ex_Name,y_val,predictors,dist,link):
    # Creates empty classes section for categoricals. Splits the predictors.
    classes = ""
    string = predictors.split()
    for each in string:
        if "$" in each:
            classes += each + " "
    classes = classes.rstrip().replace("$","")
    predictors = predictors.replace("$","")
    mod_fit = (f"""/* Fitted model. Uses ods output to enable copying. */
ods output ModelFit=ModelFit;
ods output ParameterEstimates=ParameterEstimates;
proc genmod data={Ex_Name};
 class {classes};
 model {y_val}={predictors}/dist={dist} link={link};
 ODS select ModelFit;
 ODS select ParameterEstimates;
run;""")
    return(mod_fit)

Null model.

In [8]:
def null_model(Ex_Name,y_val,dist,link):
    unfit = (f"""/* Null model. Uses ods output to enable copying.*/
ods output ModelFit=ModelFit;
proc genmod data={Ex_Name};
 model {y_val}=/dist={dist} link={link};
 ODS select ModelFit;
 ODS select ParameterEstimates;
run;""")
    return(unfit)

Get values for prediction.

In [9]:
def predicted(predictors,predictions):
    predict = (f"""/* Set prediction values. */
data predict;
input {predictors}; cards;
{predictions}
run;""")
    return(predict)

Calculate predicted.

In [10]:
def predicted_model(y_val,predictors,dist,link):
    classes = ""
    string = predictors.split()
    for each in string:
        if "$" in each:
            classes += each + " "
    classes = classes.rstrip().replace("$","")
    predictors = predictors.replace("$","")
    mod_fit = (f"""/* Predicted model. */
proc genmod;
 class {classes};
 model {y_val} ={predictors}/dist={dist} link={link};
 output out=outdata p=p{y_val};
 ODS select ModelFit;
 ODS select ParameterEstimates;
run;""")
    return(mod_fit)

Select all inputs.

In [11]:
Ex_Name = input("give name: ")
Ex_Num = input("give num: ")
variables = acquire_variables(Ex_Num)
print(f"Original variables are {variables}")

# Designate y value. Removes it. Describe predictors.
y_val = input("give response: ").lower().replace("$","")
predictors = re.sub(rf' \b{y_val}\b',"",variables)
print(f"Predictors are {predictors}")

# Transform data as needed.
tag = input("y to modify").lower()
temp = ""
transformed = ""
if tag == "y":
    y_val,temp = modify_y(y_val)
transformed += temp
temp = ""
tag = input("x to modify").lower()
if tag == "x":
    predictors,temp = modify_x(predictors)
transformed += temp

# Distibution, link, prediction values.
dist = input("give distribution: ").lower()
link = input("give link function: ").lower()
predictions = input("give values: ")

Original variables are  age gender$ quiettime nchildren stresslevel jobstatus$ nactivities pastvac sleephours


Acquire variables from functions.

In [None]:
#Take variables
initial_data = call_data(Ex_Name,Ex_Num,variables,transformed)
test_normality_v = test_normality(Ex_Num,y_val)
fitted_model_v = fitted_model(Ex_Name,y_val,predictors,dist,link)
null_model_v = null_model(Ex_Name,y_val,dist,link)
predicted_v = predicted(predictors,predictions)
predicted_model_v = predicted_model(y_val,predictors,dist,link)

Print.

In [None]:
print(f"""{initial_data}

{test_normality_v}

{fitted_model_v}

/* Copy log likelihood from general model's ModelFit. */
data _null_;
set ModelFit;
if Criterion="Log Likelihood" then call symputx("FittedLogLike", Value);
run;
%put LogLike = &FittedLogLike;

/* Calculates degrees of freedom for deviance testing. Counts rows from ParameterEstimates with DF = 1 then subtracts 2, with 2 representing the number of parameters of the null model.*/
proc sql noprint;
select (count(DF)-2) into :Rows from ParameterEstimates where DF = 1;
quit;
run;
%put Rows = &Rows;

/* Outputs ParameterEstimates to a file for Python to write out a regression analysis. */
proc export data=ParameterEstimates
outfile = "C:/Users/3sekk/Desktop/fall 23 stats/410/text.csv"
dbms = csv replace;
delimeter = ',';
run;

{null_model_v}

/* Copy log likelihood from null model's ModelFit. */
data _null_;
set ModelFit;
if Criterion="Log Likelihood" then call symputx("NullLogLike", Value);
run;
%put LogLike = &NullLogLike;

/* Deviance test via macros. */
data deviance_test;
deviance=-2*(&NullLogLike-(&FittedLogLike));
pvalue=1-probchi(deviance,&Rows);
run;
proc print noobs;
run;

{predicted_v}

/* Plop into dataset. */
data {Ex_Name};
set {Ex_Name} predict;
run;

{predicted_model_v}

/* Acquire the final row. */
proc sql noprint;
select (count(*)) into :Rows from outdata;
quit;
run;
%put Rows = &Rows;
/* Output prediction. */
proc print data=outdata (firstobs=&Rows) noobs;
var p{y_val};
run; 

/* Reset data. If this isn't present, rerunning this will cause the dataset to keep appending predictions. */
proc delete data={Ex_Name};
run;
""",file=open('output.sas','w'))